IMG_Worker/modules/text_rendering_module.py

# -*- coding: utf-8 -*-
"""
텍스트 렌더링 모듈 - 인페인팅된 이미지에 번역된 텍스트를 자연스럽게 렌더링 (라이브러리화)
"""

import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from typing import List, Dict, Any, Tuple, Optional
import os
import math
import gc
import logging

class TextRenderingModule:
    def __init__(self, logger, font_path: Optional[str] = None):
        self.logger = logger
        self.font_path = font_path or self._setup_default_fonts()
        self.default_font_size = 20
        self.font_cache = {}
        self.logger.log("텍스트 렌더링 모듈 초기화 완료", level=logging.INFO)
        self.logger.log(f"기본 폰트: {self.font_path}", level=logging.INFO)

    def _setup_default_fonts(self):
        possible_fonts = [
            "/usr/share/fonts/truetype/nanum/NanumGothic.ttf",
            "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
            "/System/Library/Fonts/AppleGothic.ttf",
            "C:/Windows/Fonts/malgun.ttf",
            "C:/Windows/Fonts/gulim.ttc"
        ]
        self.available_fonts = [f for f in possible_fonts if os.path.exists(f)]

        return self.available_fonts


    def get_font(self, size: int, font_path: Optional[str] = None) -> ImageFont.FreeTypeFont:
        font_path = font_path or self.font_path
        cache_key = f"{font_path}_{size}"
        if cache_key not in self.font_cache:
            try:
                if font_path and os.path.exists(font_path):
                    font = ImageFont.truetype(font_path, size)
                else:
                    font = ImageFont.load_default()
                self.font_cache[cache_key] = font
            except Exception as e:
                print(f"폰트 로드 오류: {e}")
                font = ImageFont.load_default()
                self.font_cache[cache_key] = font
        return self.font_cache[cache_key]

    def estimate_text_size(self, text: str, font_size: int, font_path: Optional[str] = None) -> Tuple[int, int]:
        font = self.get_font(font_size, font_path)
        try:
            bbox = font.getbbox(text)
            width = bbox[2] - bbox[0]
            height = bbox[3] - bbox[1]
        except AttributeError:
            width, height = font.getsize(text)
        return width, height

    def calculate_optimal_font_size(self, text: str, target_width: int, target_height: int, min_size: int = 8, max_size: int = 100, font_path: Optional[str] = None) -> int:
        best_size = min_size
        for size in range(min_size, max_size + 1):
            width, height = self.estimate_text_size(text, size, font_path)
            if width <= target_width and height <= target_height:
                best_size = size
            else:
                break
        return best_size

    def _estimate_background_color(self, image: np.ndarray, x1: int, y1: int, x2: int, y2: int) -> Tuple[int, int, int]:
        margin = 5
        y1_exp = max(0, y1 - margin)
        y2_exp = min(image.shape[0], y2 + margin)
        x1_exp = max(0, x1 - margin)
        x2_exp = min(image.shape[1], x2 + margin)
        region = image[y1_exp:y2_exp, x1_exp:x2_exp]
        if region.size == 0:
            # ROI가 비었을 때는 흰색 배경 가정
            return (255, 255, 255)

        mean_color = np.mean(region, axis=(0, 1))
        if np.isnan(mean_color).any():
            return (255, 255, 255)

        return (int(mean_color[2]), int(mean_color[1]), int(mean_color[0]))

    def _get_contrasting_color(self, bg_color: Tuple[int, int, int]) -> Tuple[int, int, int]:
        brightness = (bg_color[0] * 0.299 + bg_color[1] * 0.587 + bg_color[2] * 0.114)
        if brightness > 128:
            return (0, 0, 0)
        else:
            return (255, 255, 255)

    def render_text(self, image: np.ndarray, ocr_results: List[Dict], translated_texts: List[str], font_path: Optional[str] = None) -> np.ndarray:
        result_image = image.copy()
        for i, (ocr_result, translated_text) in enumerate(zip(ocr_results, translated_texts)):
            polygon = ocr_result['polygon']
            polygon_array = np.array(polygon)
            x_coords = polygon_array[:, 0]
            y_coords = polygon_array[:, 1]
            x_min, x_max = int(np.min(x_coords)), int(np.max(x_coords))
            y_min, y_max = int(np.min(y_coords)), int(np.max(y_coords))
            width = x_max - x_min
            height = y_max - y_min
            optimal_font_size = self.calculate_optimal_font_size(translated_text, width, height, font_path=font_path)
            text_width, text_height = self.estimate_text_size(translated_text, optimal_font_size, font_path)
            center_x = (x_min + x_max) // 2
            center_y = (y_min + y_max) // 2
            text_x = center_x - text_width // 2
            text_y = center_y - text_height // 2
            angle = 0
            if len(polygon_array) >= 2:
                dx = polygon_array[1][0] - polygon_array[0][0]
                dy = polygon_array[1][1] - polygon_array[0][1]
                angle = math.degrees(math.atan2(dy, dx))
            bg_color = self._estimate_background_color(image, x_min, y_min, x_max, y_max)
            text_color = self._get_contrasting_color(bg_color)
            result_image = self.render_text_on_image(
                result_image, translated_text, (text_x, text_y),
                font_size=optimal_font_size,
                font_path=font_path,
                text_color=text_color,
                background_color=None,
                angle=angle
            )
        return result_image

    def render_text_on_image(self, image: np.ndarray, text: str, position: Tuple[int, int], font_size: Optional[int] = None, font_path: Optional[str] = None, text_color: Tuple[int, int, int] = (0, 0, 0), background_color: Optional[Tuple[int, int, int]] = None, angle: float = 0) -> np.ndarray:
        if font_size is None:
            font_size = self.default_font_size
        pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        draw = ImageDraw.Draw(pil_image)
        font = self.get_font(font_size, font_path)
        text_width, text_height = self.estimate_text_size(text, font_size, font_path)
        if background_color is not None:
            bg_x1 = position[0] - 2
            bg_y1 = position[1] - 2
            bg_x2 = position[0] + text_width + 2
            bg_y2 = position[1] + text_height + 2
            draw.rectangle([bg_x1, bg_y1, bg_x2, bg_y2], fill=background_color)
        if angle != 0:
            text_image = Image.new('RGBA', (text_width + 10, text_height + 10), (255, 255, 255, 0))
            text_draw = ImageDraw.Draw(text_image)
            text_draw.text((5, 5), text, font=font, fill=text_color + (255,))
            rotated_text = text_image.rotate(angle, expand=True)
            pil_image.paste(rotated_text, position, rotated_text)
        else:
            draw.text(position, text, font=font, fill=text_color)
        result_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
        return result_image

    def create_text_styles(self) -> Dict[str, Dict[str, Any]]:
        """다양한 텍스트 스타일 정의"""
        styles = {
            'default': {
                'color': (0, 0, 0),
                'bg_color': None,
                'outline': True,
                'outline_color': (255, 255, 255),
                'outline_width': 1
            },
            'bold': {
                'color': (0, 0, 0),
                'bg_color': (255, 255, 255),
                'outline': True,
                'outline_color': (128, 128, 128),
                'outline_width': 2
            },
            'highlight': {
                'color': (255, 255, 255),
                'bg_color': (255, 0, 0),
                'outline': False,
                'outline_color': None,
                'outline_width': 0
            },
            'subtle': {
                'color': (128, 128, 128),
                'bg_color': None,
                'outline': True,
                'outline_color': (255, 255, 255),
                'outline_width': 1
            }
        }

        return styles

    def render_with_style(self, image: np.ndarray, ocr_results: List[Dict],
                         translated_texts: List[str], style_name: str = 'default') -> np.ndarray:
        """스타일을 적용한 텍스트 렌더링"""
        styles = self.create_text_styles()

        if style_name not in styles:
            print(f"알 수 없는 스타일: {style_name}")
            style_name = 'default'

        style = styles[style_name]

        # 기본 렌더링 후 스타일 적용
        result = self.render_text(image, ocr_results, translated_texts)

        # 추가 스타일 처리는 여기서 구현
        # (예: 그림자, 글로우 효과 등)

        return result

    def adjust_text_for_space(self, text: str, max_width: int, max_height: int,
                            font_size: int) -> Tuple[str, int]:
        """
        공간에 맞게 텍스트 조정

        Args:
            text (str): 원본 텍스트
            max_width (int): 최대 너비
            max_height (int): 최대 높이
            font_size (int): 폰트 크기

        Returns:
            Tuple[str, int]: 조정된 텍스트와 폰트 크기
        """
        # 텍스트가 너무 길면 줄바꿈 또는 생략
        if len(text) > 20:
            # 긴 텍스트는 줄바꿈
            words = text.split(' ')
            if len(words) > 1:
                mid = len(words) // 2
                text = ' '.join(words[:mid]) + '\n' + ' '.join(words[mid:])
            else:
                # 단어가 하나면 생략
                text = text[:15] + '...'

        # 폰트 크기 조정
        adjusted_font_size = font_size
        while adjusted_font_size > 8:
            # 실제로는 텍스트 크기를 측정해서 비교
            estimated_width = len(text) * adjusted_font_size * 0.6
            if estimated_width <= max_width:
                break
            adjusted_font_size -= 2

        return text, adjusted_font_size

    def _create_style_comparison(self, images: List[np.ndarray], style_names: List[str]):
        """스타일 비교 이미지 생성"""
        if not images:
            return

        # 이미지 크기 조정
        target_width = 200
        target_height = int(images[0].shape[0] * target_width / images[0].shape[1])

        resized_images = []
        for img in images:
            resized = cv2.resize(img, (target_width, target_height))
            resized_images.append(resized)

        # 비교 이미지 생성
        num_images = len(resized_images)
        comparison_width = target_width * num_images
        comparison_height = target_height + 30

        comparison = np.ones((comparison_height, comparison_width, 3), dtype=np.uint8) * 255

        # 원본 이미지
        comparison[30:30+target_height, 0:target_width] = resized_images[0]
        cv2.putText(comparison, "Original", (10, 20),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)

        # 스타일 이미지들
        for i, (img, style_name) in enumerate(zip(resized_images[1:], style_names)):
            x_offset = target_width * (i + 1)
            comparison[30:30+target_height, x_offset:x_offset+target_width] = img
            cv2.putText(comparison, style_name, (x_offset + 10, 20),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)

        cv2.imwrite("test_output/text_style_comparison.jpg", comparison)
        self.logger.log("스타일 비교 이미지 저장 완료", level=logging.INFO)


class TextRenderingModuleOptimized(TextRenderingModule):
    """TextRenderingModule 메모리 최적화 버전.
    - 오버레이 버퍼를 (small/medium/large) 3종만 캐시하여 재사용
    - 필요한 경우(더 큰 텍스트 영역)에는 1회성 버퍼를 생성 후 즉시 폐기
    - 기존 public API(render_text, render_with_style 등)은 그대로 유지
    """

    def __init__(self, logger, font_path: Optional[str] = None):
        super().__init__(logger, font_path)
        # 범용 재사용 오버레이 캐시 (key: size label -> ndarray)
        self._fixed_buffers: Dict[str, np.ndarray] = {
            'small': None,   # 256x256
            'medium': None,  # 512x512
            'large': None,   # 1024x1024
        }

    # ────────────────────────────────────────────────
    # 내부: 오버레이 버퍼 선택/재사용
    # ────────────────────────────────────────────────
    def _get_reusable_overlay(self, w: int, h: int) -> Tuple[np.ndarray, bool]:
        """주어진 크기를 수용할 수 있는 재사용 버퍼와 재사용 여부 반환"""
        # 버퍼 캐싱을 제거하고 항상 새 C-contiguous 배열을 반환
        return np.zeros((h, w, 4), dtype=np.uint8, order="C"), False

    # ────────────────────────────────────────────────
    # override render_text
    # ────────────────────────────────────────────────
    def render_text(
        self,
        image: np.ndarray,
        ocr_results: List[Dict[str, Any]],
        translated_texts: List[str],
        font_path: Optional[str] = None,
        style_name: Optional[str] = None,
        debug: bool = False,
    ) -> np.ndarray:
        # 가독성을 위해 기본 구현을 그대로 사용
        if style_name:
            return super().render_with_style(
                image, ocr_results, translated_texts, style_name=style_name
            )
        return super().render_text(
            image, ocr_results, translated_texts, font_path=font_path
        )

    # ────────────────────────────────────────────────
    # alpha blend util (BGR target, RGBA src)
    # ────────────────────────────────────────────────
    def _alpha_blend(self, target_bgr: np.ndarray, src_rgba: np.ndarray, top_left: Tuple[int, int]):
        x0, y0 = top_left
        h, w = src_rgba.shape[:2]
        # 클리핑
        if x0 >= target_bgr.shape[1] or y0 >= target_bgr.shape[0]:
            return
        x1 = min(x0 + w, target_bgr.shape[1])
        y1 = min(y0 + h, target_bgr.shape[0])
        w = x1 - x0
        h = y1 - y0
        if w <= 0 or h <= 0:
            return

        roi = target_bgr[y0:y1, x0:x1]
        src = src_rgba[:h, :w]

        # 보장: contiguous & float32 계산 → uint8 캐스팅
        if not src.flags['C_CONTIGUOUS']:
            src = np.ascontiguousarray(src)

        alpha = (src[:, :, 3].astype(np.float32) / 255.0)[..., None]  # (h,w,1)
        if np.all(alpha == 0):
            return

        src_bgr = src[:, :, :3][:, :, ::-1].astype(np.float32)   # RGBA ➜ BGR
        roi_bgr = roi.astype(np.float32)

        blended = roi_bgr * (1.0 - alpha) + src_bgr * alpha
        roi[:, :, :] = blended.astype(np.uint8)