IMG_Worker/modules/ocr_module.py

# -*- coding: utf-8 -*-
"""
OCR 모듈 - PaddleOCR3을 사용한 텍스트 감지
폴리곤 방식으로 텍스트 영역을 감지합니다.
"""

import cv2
import numpy as np
import os
import logging
from typing import List, Dict, Any

class OCRModule:
    def __init__(self, logger=None, base_dir=None, gpu_manager=None, force_cpu=False):
        self.logger = logger
        self.base_dir = base_dir
        self.gpu_manager = gpu_manager
        self.force_cpu = force_cpu  # CPU 강제 사용 플래그

        # CPU 강제 사용 모드 확인
        if force_cpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = ''
            if self.logger:
                self.logger.log("⚠️ OCR 모듈 CPU 강제 모드 (CUDNN 버전 불일치 회피)", level=logging.WARNING)
        # GPU 관리자가 있고 CUDA를 사용할 수 있는 경우
        elif gpu_manager and gpu_manager.can_use_cuda:
            # CUDA 사용 허용
            if self.logger:
                self.logger.log("OCR 모듈 CUDA 모드 활성화", level=logging.INFO)
            # CUDA_VISIBLE_DEVICES 설정 제거 (모든 GPU 사용 가능)
            if 'CUDA_VISIBLE_DEVICES' in os.environ:
                del os.environ['CUDA_VISIBLE_DEVICES']
        else:
            # CPU만 사용하도록 환경 변수 설정
            os.environ['CUDA_VISIBLE_DEVICES'] = ''
            if self.logger:
                self.logger.log("OCR 모듈 CPU 모드로 설정", level=logging.INFO)
        # OpenCV OpenCL 비활성화 (메모리 primitive 오류 예방)
        try:
            import cv2 as _cv2
            _cv2.ocl.setUseOpenCL(False)
        except Exception:
            pass

        # 멀티 스레드 사용 시 오류 방지
        os.environ["OMP_NUM_THREADS"] = "1"
        os.environ["KMP_DUPLICATE_LIB_OK"] = "True"

        # 멀티 스레드 사용 시 오류 방지
        os.environ['OPENBLAS_NUM_THREADS']  = '1'
        os.environ['MKL_NUM_THREADS']       = '1'
        os.environ['NUMEXPR_NUM_THREADS']   = '1'

        self.ocr = None

        # OCR 초기화 시도
        self.ocr = self.initialize_ocr()
        if self.ocr is None:
            if self.logger:
                self.logger.log("PaddleOCR 초기화 실패", level=logging.ERROR, exc_info=True)
            raise Exception("PaddleOCR 초기화 실패")
        else:
            # 초기화 성공 로깅
            cuda_status = "CUDA" if (self.gpu_manager and self.gpu_manager.can_use_cuda) else "CPU"
            if self.logger:
                self.logger.log(f"✅ PaddleOCR 초기화 성공 ({cuda_status} 모드)", level=logging.INFO)

    def initialize_ocr(self):
        """
        플랫폼별 최적화된 OCR 초기화
        - x86: 기존 PaddleOCR 사용
        - ARM: FastDeploy 또는 ONNX Runtime 사용
        """
        # 모델 디렉토리 설정
        self.rec_model_dir = os.path.join(self.base_dir, "modules", "PP_Models", "rec")
        self.det_model_dir = os.path.join(self.base_dir, "modules", "PP_Models", "det")
        self.cls_model_dir = os.path.join(self.base_dir, "modules", "PP_Models", "cls")

        # 아키텍처 감지
        try:
            from src.utils.platform_utils import is_arm, get_architecture
            is_arm_arch = is_arm()
            arch_name = get_architecture()

            if self.logger:
                self.logger.log(f"🔍 감지된 아키텍처: {arch_name} ({'ARM' if is_arm_arch else 'x86'})", level=logging.INFO)
        except ImportError:
            import platform
            arch = platform.machine().lower()
            is_arm_arch = arch in ('arm64', 'aarch64', 'arm', 'armv7l')
            arch_name = arch

        # GPU 상태 확인 (force_cpu 플래그 우선)
        if self.force_cpu:
            use_gpu_for_ocr = False
            if self.logger:
                self.logger.log("🚫 CPU 강제 모드: GPU 사용 비활성화", level=logging.WARNING)
        else:
            use_gpu_for_ocr = self.gpu_manager and self.gpu_manager.can_use_cuda

        try:
            if is_arm_arch:
                # ARM에서는 대체 백엔드 사용
                return self._initialize_arm_ocr(use_gpu_for_ocr)
            else:
                # x86에서는 기존 PaddleOCR 사용
                return self._initialize_x86_ocr(use_gpu_for_ocr)

        except Exception as e:
            if self.logger:
                self.logger.log(f"OCR 초기화 실패, 폴백 시도: {e}", level=logging.WARNING)

            # 폴백: 기존 PaddleOCR 시도
            try:
                return self._initialize_x86_ocr(use_gpu_for_ocr)
            except Exception as fallback_e:
                if self.logger:
                    self.logger.log(f"폴백 OCR 초기화도 실패: {fallback_e}", level=logging.ERROR)
                raise

    def _initialize_arm_ocr(self, use_gpu):
        """ARM 아키텍처용 OCR 초기화"""
        if self.logger:
            self.logger.log("🔧 ARM 최적화 OCR 백엔드 초기화 중...", level=logging.INFO)

        # 우선순위: FastDeploy > ONNX Runtime > PaddleOCR

        # 1. FastDeploy 시도
        try:
            from src.modules.ocr_backends import FastDeployOCR

            ocr = FastDeployOCR(
                use_gpu=use_gpu,
                use_angle_cls=True,
                lang='ch',
                det_model_dir=self.det_model_dir if os.path.exists(self.det_model_dir) else None,
                rec_model_dir=self.rec_model_dir if os.path.exists(self.rec_model_dir) else None,
                cls_model_dir=self.cls_model_dir if os.path.exists(self.cls_model_dir) else None,
                logger=self.logger
            )

            if self.logger:
                self.logger.log("✅ FastDeploy OCR 백엔드 사용 (ARM 최적화)", level=logging.INFO)
            return ocr

        except (ImportError, Exception) as e:
            if self.logger:
                self.logger.log(f"FastDeploy 초기화 실패: {e}", level=logging.WARNING)

        # 2. ONNX Runtime 시도
        try:
            from src.modules.ocr_backends import ONNXRuntimeOCR

            ocr = ONNXRuntimeOCR(
                use_gpu=False,  # ARM에서는 CPU 사용 권장
                use_angle_cls=True,
                lang='ch',
                det_model_dir=self.det_model_dir if os.path.exists(self.det_model_dir) else None,
                rec_model_dir=self.rec_model_dir if os.path.exists(self.rec_model_dir) else None,
                cls_model_dir=self.cls_model_dir if os.path.exists(self.cls_model_dir) else None,
                logger=self.logger
            )

            if self.logger:
                self.logger.log("✅ ONNX Runtime OCR 백엔드 사용 (ARM 최적화)", level=logging.INFO)
            return ocr

        except (ImportError, Exception) as e:
            if self.logger:
                self.logger.log(f"ONNX Runtime 초기화 실패: {e}", level=logging.WARNING)

        # 3. 마지막 폴백: PaddleOCR (성능 저하 가능)
        if self.logger:
            self.logger.log("⚠️ ARM에서 PaddleOCR 폴백 사용 (성능 저하 가능)", level=logging.WARNING)
        return self._initialize_x86_ocr(False)  # ARM에서는 CPU만 사용

    def _initialize_x86_ocr(self, use_gpu):
        """x86 아키텍처용 기존 PaddleOCR 초기화"""
        try:
            from paddleocr import PaddleOCR

            # GPU 모드 시도
            if use_gpu:
                try:
                    if self.logger:
                        self.logger.log("🔄 PaddleOCR GPU 모드 초기화 시도...", level=logging.INFO)

                    ocr = PaddleOCR(
                        use_gpu=True,
                        use_angle_cls=True,
                        lang="ch",
                        det_model_dir=self.det_model_dir,
                        rec_model_dir=self.rec_model_dir,
                        cls_model_dir=self.cls_model_dir,
                        rec_batch_num=4,
                        cpu_threads=2
                    )

                    # GPU 초기화 테스트 (간단한 이미지로 테스트)
                    try:
                        import numpy as np
                        test_img = np.zeros((100, 100, 3), dtype=np.uint8)
                        _ = ocr.ocr(test_img)

                        if self.logger:
                            self.logger.log("✅ PaddleOCR GPU 모드 초기화 성공", level=logging.INFO)
                        return ocr
                    except Exception as test_error:
                        # GPU 테스트 실패 - CPU로 폴백
                        error_msg = str(test_error).lower()
                        if 'cudnn' in error_msg or 'cuda' in error_msg or 'gpu' in error_msg:
                            if self.logger:
                                self.logger.log(f"⚠️ GPU 테스트 실패 (CUDA 라이브러리 문제), CPU 모드로 전환: {test_error}", level=logging.WARNING)
                            del ocr  # GPU 모드 객체 삭제
                            use_gpu = False  # CPU 모드로 전환
                        else:
                            raise

                except Exception as gpu_error:
                    error_msg = str(gpu_error).lower()
                    if 'cudnn' in error_msg or 'cuda' in error_msg or 'preconditionnotmet' in error_msg:
                        if self.logger:
                            self.logger.log(f"⚠️ GPU 초기화 실패 (CUDA 라이브러리 문제), CPU 모드로 전환: {gpu_error}", level=logging.WARNING)
                        use_gpu = False
                    else:
                        raise

            # CPU 모드 초기화
            if not use_gpu:
                if self.logger:
                    self.logger.log("🔄 PaddleOCR CPU 모드 초기화...", level=logging.INFO)

                # CPU 전용 환경 변수 설정
                os.environ['CUDA_VISIBLE_DEVICES'] = ''

                ocr = PaddleOCR(
                    use_gpu=False,
                    use_angle_cls=True,
                    lang="ch",
                    det_model_dir=self.det_model_dir,
                    rec_model_dir=self.rec_model_dir,
                    cls_model_dir=self.cls_model_dir,
                    rec_batch_num=4,
                    cpu_threads=2
                )

                if self.logger:
                    self.logger.log("✅ PaddleOCR CPU 모드 초기화 성공", level=logging.INFO)

            return ocr

        except Exception as e:
            if self.logger:
                self.logger.log(f"❌ PaddleOCR 초기화 실패: {e}", level=logging.ERROR, exc_info=True)
            return None


    def detect_text(self, image_path: str, method: str = 'polygon', raise_on_memory_error: bool = False) -> List[Dict[str, Any]]:
        """
        이미지에서 텍스트를 감지하고 다양한 방식으로 영역 반환

        Args:
            image_path (str): 이미지 파일 경로
            method (str): 감지 방식 ('polygon', 'bbox', 'expanded_bbox', 'rotated_bbox', 'contour')

        Returns:
            List[Dict]: 감지된 텍스트 정보 리스트
                - text: 감지된 텍스트
                - confidence: 신뢰도
                - polygon: 폴리곤 좌표 (4개 점)
                - bbox: 바운딩 박스 좌표 (x, y, w, h)
                - method: 사용된 감지 방식
        """
        if not os.path.exists(image_path):
            self.logger.log(f"이미지 파일을 찾을 수 없습니다: {image_path}", level=logging.ERROR, exc_info=True)
            return []

        image = None
        try:
            # 이미지 읽기
            image = cv2.imread(image_path)
            if image is None:
                self.logger.log(f"이미지를 읽을 수 없습니다: {image_path}", level=logging.ERROR, exc_info=True)
                return []

            self.logger.log(f"🔍 OCR 감지 방식: {method}", level=logging.INFO)

            # ---- 메모리 안전: 극단적 가로/세로 비율 조정 --------------------
            try:
                h, w = image.shape[:2]
                max_dim_safe = 2000  # OCR 입력 가로/세로 상한선
                aspect_ratio = max(w, h) / max(1, min(w, h))
                if max(w, h) > max_dim_safe or aspect_ratio > 15:
                    scale = float(max_dim_safe) / float(max(w, h))
                    new_size = (int(w * scale), int(h * scale))
                    self.logger.log(
                        f"⚖️ OCR용 다운스케일 ({w}x{h}) -> {new_size} (ratio={aspect_ratio:.1f})",
                        level=logging.INFO,
                    )
                    # .copy() 사용으로 뷰 문제 해결
                    image = cv2.resize(image, new_size, interpolation=cv2.INTER_AREA).copy()
            except Exception as e:
                self.logger.log(f"OCR 입력 스케일링 중 오류: {e}", level=logging.WARNING)

            # 실제 OCR 실행
            try:
                ocr_raw_results = self.ocr.ocr(image)
            except Exception as e:
                err_msg = str(e).lower()

                # CUDA/CUDNN 오류 감지
                cuda_signals = ['cudnn', 'cuda', 'gpu', 'preconditionnotmet']
                if any(s in err_msg for s in cuda_signals):
                    self.logger.log(
                        f"⚠️ OCR 처리 중 CUDA 오류 발생: {e}",
                        level=logging.WARNING,
                    )
                    self.logger.log(
                        "💡 PaddleOCR이 GPU 모드로 초기화되었지만 실행 중 CUDA 라이브러리 문제가 발생했습니다. "
                        "프로그램을 재시작하면 자동으로 CPU 모드로 전환됩니다.",
                        level=logging.WARNING,
                    )
                    return []

                # 메모리 오류 감지 및 재시도
                mem_signals = [
                    'unable to allocate',
                    'out of memory',
                    'could not create a memory object',
                    'create a primitive',
                    'cv::outofmemoryerror',
                ]
                if isinstance(e, MemoryError) or any(s in err_msg for s in mem_signals):
                    try:
                        h, w = image.shape[:2]
                        self.logger.log(
                            f"🔁 OCR 메모리 오류 재시도: 이미지 1/2 축소 ({w}x{h}) → ({w//2}x{h//2})",
                            level=logging.WARNING,
                        )
                        # .copy() 사용으로 뷰 문제 해결
                        image_small = cv2.resize(image, (max(1, w // 2), max(1, h // 2)), interpolation=cv2.INTER_AREA).copy()
                        ocr_raw_results = self.ocr.ocr(image_small)
                        # 임시 이미지 해제
                        del image_small
                    except Exception as e2:
                        try:
                            # 2차 재시도 전 강제 GC
                            import gc
                            gc.collect()

                            self.logger.log(
                                f"🔁 2차 재시도(1/4 축소): {e2}", level=logging.WARNING,
                            )
                            # .copy() 사용으로 뷰 문제 해결
                            image_small2 = cv2.resize(image, (max(1, w // 4), max(1, h // 4)), interpolation=cv2.INTER_AREA).copy()
                            ocr_raw_results = self.ocr.ocr(image_small2)
                            # 임시 이미지 해제
                            del image_small2
                        except Exception as e3:
                            self.logger.log(
                                f"❌ OCR 재시도 실패: {e3}", level=logging.ERROR, exc_info=True,
                            )
                            if raise_on_memory_error:
                                raise MemoryError(f"OCR memory/primitive failure after retries: {e3}")
                            return []
                else:
                    raise

            if not ocr_raw_results or len(ocr_raw_results) == 0 or all(x is None for x in ocr_raw_results):
                self.logger.log("⚠️ OCR 결과가 비어있거나 모두 None입니다. 중국어 텍스트가 없는 이미지입니다.", level=logging.WARNING)
                return []

            self.logger.log(f"ocr_raw_results: {ocr_raw_results}", level=logging.INFO)
            for line in ocr_raw_results:
                self.logger.log(f"line: {line}", level=logging.INFO)

            # paddleocr 2.x 결과 파싱
            converted_results = []
            for page in ocr_raw_results:  # page는 텍스트별 결과 리스트
                for line in page:
                    poly = line[0]
                    text = line[1][0]
                    score = line[1][1]
                    converted_results.append([poly, [text, score]])

            # 감지 방식에 따라 결과 처리
            if method == 'polygon':
                ocr_results = self._detect_with_polygon(image, converted_results)
            elif method == 'bbox':
                ocr_results = self._detect_with_bbox(image, converted_results)
            elif method == 'expanded_bbox':
                ocr_results = self._detect_with_expanded_bbox(image, converted_results)
            elif method == 'rotated_bbox':
                ocr_results = self._detect_with_rotated_bbox(image, converted_results)
            elif method == 'contour':
                ocr_results = self._detect_with_contour(image, converted_results)
            else:
                self.logger.log(f"⚠️ 지원하지 않는 감지 방식: {method}, 기본 polygon 방식 사용", level=logging.WARNING)
                ocr_results = self._detect_with_polygon(image, converted_results)

            return ocr_results

        except Exception as e:
            self.logger.log(f"OCR 처리 중 오류: {e}", level=logging.ERROR, exc_info=True)
            if raise_on_memory_error and ('memory' in str(e).lower() or 'primitive' in str(e).lower()):
                raise MemoryError(f"OCR memory/primitive error: {e}")
            return []
        finally:
            # 명시적 메모리 해제
            if image is not None:
                del image
                cv2.destroyAllWindows()  # OpenCV 윈도우 정리

            import gc
            gc.collect()

    def filter_chinese_text(self, ocr_results: List[Dict]) -> List[Dict]:
        """
        중국어 텍스트만 필터링

        Args:
            ocr_results (List[Dict]): OCR 결과

        Returns:
            List[Dict]: 중국어 텍스트만 포함된 결과
        """
        chinese_results = []

        for result in ocr_results:
            text = result['text']
            # 중국어 문자 범위 확인 (간체/번체 포함)
            if any('\u4e00' <= char <= '\u9fff' for char in text):
                chinese_results.append(result)

        self.logger.log(f"중국어 텍스트 {len(chinese_results)}개 필터링 완료", level=logging.INFO)
        return chinese_results

    def filter_korean_text(self, ocr_results: List[Dict]) -> List[Dict]:
        """
        한글 텍스트만 필터링

        Args:
            ocr_results (List[Dict]): OCR 결과

        Returns:
            List[Dict]: 한글 텍스트만 포함된 결과
        """
        korean_results = []
        for result in ocr_results:
            text = result['text']
            # 한글 유니코드 범위: 가~힣
            if any('\uac00' <= char <= '\ud7a3' for char in text):
                korean_results.append(result)

        self.logger.log(f"한글 텍스트 {len(korean_results)}개 필터링 완료", level=logging.INFO)
        return korean_results

    def _detect_with_polygon(self, image: np.ndarray, ocr_raw_results: List) -> List[Dict[str, Any]]:
        """폴리곤 방식으로 텍스트 영역 감지 (기본 방식)"""
        ocr_results = []

        for line in ocr_raw_results:
            if len(line) >= 2:
                polygon = line[0]  # 폴리곤 좌표 (4개 점)
                text_info = line[1]  # (텍스트, 신뢰도)

                if len(text_info) >= 2:
                    text = text_info[0]
                    confidence = text_info[1]

                    # 폴리곤을 바운딩 박스로 변환
                    polygon_np = np.array(polygon, dtype=np.int32)
                    x, y, w, h = cv2.boundingRect(polygon_np)

                    ocr_result = {
                        'text': text,
                        'confidence': confidence,
                        'polygon': polygon,
                        'bbox': (x, y, w, h),
                        'method': 'polygon'
                    }
                    ocr_results.append(ocr_result)

        return ocr_results

    def _detect_with_bbox(self, image: np.ndarray, ocr_raw_results: List) -> List[Dict[str, Any]]:
        """바운딩 박스 방식으로 텍스트 영역 감지"""
        ocr_results = []

        for line in ocr_raw_results:
            if len(line) >= 2:
                polygon = line[0]
                text_info = line[1]

                if len(text_info) >= 2:
                    text = text_info[0]
                    confidence = text_info[1]

                    # 바운딩 박스 계산
                    polygon_np = np.array(polygon, dtype=np.int32)
                    x, y, w, h = cv2.boundingRect(polygon_np)

                    # 바운딩 박스를 폴리곤으로 변환
                    bbox_polygon = [
                        [x, y],
                        [x + w, y],
                        [x + w, y + h],
                        [x, y + h]
                    ]

                    ocr_result = {
                        'text': text,
                        'confidence': confidence,
                        'polygon': bbox_polygon,
                        'bbox': (x, y, w, h),
                        'method': 'bbox'
                    }
                    ocr_results.append(ocr_result)

        return ocr_results

    def _detect_with_expanded_bbox(self, image: np.ndarray, ocr_raw_results: List) -> List[Dict[str, Any]]:
        """확장된 바운딩 박스 방식으로 텍스트 영역 감지"""
        ocr_results = []
        h_img, w_img = image.shape[:2]

        for line in ocr_raw_results:
            if len(line) >= 2:
                polygon = line[0]
                text_info = line[1]

                if len(text_info) >= 2:
                    text = text_info[0]
                    confidence = text_info[1]

                    # 기본 바운딩 박스
                    polygon_np = np.array(polygon, dtype=np.int32)
                    x, y, w, h = cv2.boundingRect(polygon_np)

                    # 확장 크기 계산 (텍스트 크기의 20%)
                    expand_x = max(1, int(w * 0.2))
                    expand_y = max(1, int(h * 0.2))

                    # 확장된 바운딩 박스
                    x_exp = max(0, x - expand_x)
                    y_exp = max(0, y - expand_y)
                    w_exp = min(w_img - x_exp, w + 2 * expand_x)
                    h_exp = min(h_img - y_exp, h + 2 * expand_y)

                    # 확장된 바운딩 박스를 폴리곤으로 변환
                    expanded_polygon = [
                        [x_exp, y_exp],
                        [x_exp + w_exp, y_exp],
                        [x_exp + w_exp, y_exp + h_exp],
                        [x_exp, y_exp + h_exp]
                    ]

                    ocr_result = {
                        'text': text,
                        'confidence': confidence,
                        'polygon': expanded_polygon,
                        'bbox': (x_exp, y_exp, w_exp, h_exp),
                        'method': 'expanded_bbox'
                    }
                    ocr_results.append(ocr_result)

        return ocr_results

    def _detect_with_rotated_bbox(self, image: np.ndarray, ocr_raw_results: List) -> List[Dict[str, Any]]:
        """회전된 바운딩 박스 방식으로 텍스트 영역 감지"""
        ocr_results = []

        for line in ocr_raw_results:
            if len(line) >= 2:
                polygon = line[0]
                text_info = line[1]

                if len(text_info) >= 2:
                    text = text_info[0]
                    confidence = text_info[1]

                    # 회전된 바운딩 박스 계산
                    polygon_np = np.array(polygon, dtype=np.float32)
                    rect = cv2.minAreaRect(polygon_np)
                    box = cv2.boxPoints(rect)
                    box = np.int32(box)

                    # 일반 바운딩 박스도 계산
                    x, y, w, h = cv2.boundingRect(polygon_np.astype(np.int32))

                    ocr_result = {
                        'text': text,
                        'confidence': confidence,
                        'polygon': box.tolist(),
                        'bbox': (x, y, w, h),
                        'method': 'rotated_bbox',
                        'rotation_info': {
                            'center': rect[0],
                            'size': rect[1],
                            'angle': rect[2]
                        }
                    }
                    ocr_results.append(ocr_result)

        return ocr_results

    def _detect_with_contour(self, image: np.ndarray, ocr_raw_results: List) -> List[Dict[str, Any]]:
        """컨투어 방식으로 텍스트 영역 감지"""
        ocr_results = []

        for line in ocr_raw_results:
            if len(line) >= 2:
                polygon = line[0]
                text_info = line[1]

                if len(text_info) >= 2:
                    text = text_info[0]
                    confidence = text_info[1]

                    # 폴리곤을 컨투어로 변환
                    polygon_np = np.array(polygon, dtype=np.int32)

                    # 컨투어 근사화
                    epsilon = 0.02 * cv2.arcLength(polygon_np, True)
                    approx_contour = cv2.approxPolyDP(polygon_np, epsilon, True)

                    # 컨투어를 다시 폴리곤으로 변환
                    contour_polygon = approx_contour.reshape(-1, 2).tolist()

                    # 바운딩 박스 계산
                    x, y, w, h = cv2.boundingRect(polygon_np)

                    ocr_result = {
                        'text': text,
                        'confidence': confidence,
                        'polygon': contour_polygon,
                        'bbox': (x, y, w, h),
                        'method': 'contour',
                        'contour_points': len(contour_polygon)
                    }
                    ocr_results.append(ocr_result)

        return ocr_results