IMG_Worker/modules/ocr_backends/fastdeploy_ocr.py

"""
FastDeploy 기반 OCR 백엔드
ARM 아키텍처에서 PaddleOCR 모델을 효율적으로 실행
"""

import os
import cv2
import numpy as np
import logging
from typing import List, Tuple, Dict, Any

class FastDeployOCR:
    """FastDeploy를 사용한 PaddleOCR 호환 클래스"""

    def __init__(self, use_gpu=False, use_angle_cls=True, lang='ch',
                 det_model_dir=None, rec_model_dir=None, cls_model_dir=None,
                 logger=None, **kwargs):
        self.logger = logger
        self.use_gpu = use_gpu
        self.use_angle_cls = use_angle_cls
        self.lang = lang

        # FastDeploy 관련 설정
        self.det_model = None
        self.rec_model = None
        self.cls_model = None

        try:
            import fastdeploy as fd
            self.fd = fd

            # ARM 최적화 설정
            if use_gpu:
                self.runtime_option = fd.RuntimeOption()
                self.runtime_option.use_gpu()
            else:
                self.runtime_option = fd.RuntimeOption()
                self.runtime_option.use_cpu()
                # ARM 최적화
                self.runtime_option.set_cpu_thread_num(4)

            # 모델 초기화
            self._initialize_models(det_model_dir, rec_model_dir, cls_model_dir)

            if self.logger:
                self.logger.log("✅ FastDeploy OCR 초기화 성공 (ARM 최적화)", level=logging.INFO)

        except ImportError:
            if self.logger:
                self.logger.log("❌ FastDeploy 모듈을 찾을 수 없습니다", level=logging.ERROR)
            raise ImportError("FastDeploy가 설치되지 않았습니다: pip install fastdeploy-cpu")
        except Exception as e:
            if self.logger:
                self.logger.log(f"❌ FastDeploy OCR 초기화 실패: {e}", level=logging.ERROR)
            raise

    def _initialize_models(self, det_model_dir, rec_model_dir, cls_model_dir):
        """FastDeploy 모델 초기화"""
        try:
            # Detection 모델
            if det_model_dir and os.path.exists(det_model_dir):
                det_model_file = os.path.join(det_model_dir, "inference.pdmodel")
                det_params_file = os.path.join(det_model_dir, "inference.pdiparams")
                if os.path.exists(det_model_file) and os.path.exists(det_params_file):
                    self.det_model = self.fd.vision.ocr.DBDetector(
                        det_model_file, det_params_file, runtime_option=self.runtime_option
                    )

            # Recognition 모델
            if rec_model_dir and os.path.exists(rec_model_dir):
                rec_model_file = os.path.join(rec_model_dir, "inference.pdmodel")
                rec_params_file = os.path.join(rec_model_dir, "inference.pdiparams")
                if os.path.exists(rec_model_file) and os.path.exists(rec_params_file):
                    self.rec_model = self.fd.vision.ocr.Recognizer(
                        rec_model_file, rec_params_file, runtime_option=self.runtime_option
                    )

            # Classification 모델 (선택적)
            if self.use_angle_cls and cls_model_dir and os.path.exists(cls_model_dir):
                cls_model_file = os.path.join(cls_model_dir, "inference.pdmodel")
                cls_params_file = os.path.join(cls_model_dir, "inference.pdiparams")
                if os.path.exists(cls_model_file) and os.path.exists(cls_params_file):
                    self.cls_model = self.fd.vision.ocr.Classifier(
                        cls_model_file, cls_params_file, runtime_option=self.runtime_option
                    )

        except Exception as e:
            if self.logger:
                self.logger.log(f"FastDeploy 모델 초기화 실패: {e}", level=logging.ERROR)
            raise

    def ocr(self, img, det=True, rec=True, cls=True):
        """
        PaddleOCR과 호환되는 OCR 메서드

        Args:
            img: 입력 이미지 (numpy array 또는 PIL Image)
            det: 텍스트 감지 여부
            rec: 텍스트 인식 여부
            cls: 텍스트 방향 분류 여부

        Returns:
            List[List]: PaddleOCR과 동일한 형식의 결과
            [[bbox, (text, confidence)], ...]
        """
        try:
            # 이미지 전처리
            if hasattr(img, 'save'):  # PIL Image
                img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
            elif isinstance(img, str):  # 파일 경로
                img = cv2.imread(img)

            results = []

            if not det and not rec:
                return results

            # Detection 수행
            det_results = None
            if det and self.det_model:
                det_results = self.det_model.predict(img)

                if not rec:
                    # Detection만 수행하는 경우
                    for box in det_results.boxes:
                        results.append([box.tolist()])
                    return results

            # Recognition 수행
            if rec and self.rec_model:
                if det_results and len(det_results.boxes) > 0:
                    # Detection + Recognition
                    for i, box in enumerate(det_results.boxes):
                        # 바운딩 박스에서 텍스트 영역 추출
                        x_coords = box[:, 0]
                        y_coords = box[:, 1]
                        x_min, x_max = int(min(x_coords)), int(max(x_coords))
                        y_min, y_max = int(min(y_coords)), int(max(y_coords))

                        # 영역 추출
                        text_region = img[y_min:y_max, x_min:x_max]

                        if text_region.size > 0:
                            # Classification (선택적)
                            if cls and self.cls_model:
                                cls_result = self.cls_model.predict(text_region)
                                # 필요시 이미지 회전

                            # Recognition 수행
                            rec_result = self.rec_model.predict(text_region)

                            if hasattr(rec_result, 'text') and hasattr(rec_result, 'score'):
                                results.append([box.tolist(), (rec_result.text, rec_result.score)])
                            else:
                                results.append([box.tolist(), ("", 0.0)])
                else:
                    # Recognition만 수행하는 경우
                    rec_result = self.rec_model.predict(img)
                    if hasattr(rec_result, 'text') and hasattr(rec_result, 'score'):
                        # 전체 이미지에 대한 결과
                        h, w = img.shape[:2]
                        bbox = [[0, 0], [w, 0], [w, h], [0, h]]
                        results.append([bbox, (rec_result.text, rec_result.score)])

            return results

        except Exception as e:
            if self.logger:
                self.logger.log(f"FastDeploy OCR 추론 실패: {e}", level=logging.ERROR)
            return []