이미지 경로 변경 및 텍스트 렌더링 모듈 개선. ROI 영역 설정 추가 및 WCAG 대비 비율 계산 기능 추가. 기존 코드 정리 및 주석 보강.

2025-08-12 16:24:04 +09:00 · 2025-08-12 16:24:04 +09:00 · 1b72426779
parent 29a32c4ef6
commit 1b72426779
5 changed files with 1636 additions and 1122 deletions
--- a/test1/translated_result.png
+++ b/test1/translated_result.png
--- a/test1/worker_test.py
+++ b/test1/worker_test.py
@ -16,7 +16,7 @@ import requests


 API_ROOT = "http://localhost:7890"          # 메인 서버 주소
-IMAGE_PATH = pathlib.Path("5.jpg")
+IMAGE_PATH = pathlib.Path("6.jpg")
 TIMEOUT = 120                               # 초

 unwanted_texts = {
@ -43,6 +43,7 @@ toggle_states.update({
    "blend_mode": "simple",              # 단순 블렌딩
    "performance_mode": True,      # 빠른 경로 사용
    "max_image_size": 1280,        # 더 작은 크기 제한
+    "roi_area_high": 0.0  # 기본값: 0.60 → 0.0으로 변경 # 풀프레임 인페인팅 강제

 })

--- a/worker/text_rendering_module.py
+++ b/worker/text_rendering_module.py
--- a/worker/text_rendering_module.py.ori
+++ b/worker/text_rendering_module.py.ori
@ -0,0 +1,496 @@
+# -*- coding: utf-8 -*-
+"""
+텍스트 렌더링 모듈 - 인페인팅된 이미지에 번역된 텍스트를 자연스럽게 렌더링 (라이브러리화)
+- /app/worker/fonts/ 내 폰트를 font_map으로 관리
+- 외부에서 render_text 호출 시 font_number로 폰트 선택
+- 기본 폰트는 3번(NanumSquareRoundR.ttf)로 설정
+"""
+
+import os
+import math
+import logging
+from typing import List, Dict, Any, Tuple, Optional
+
+import cv2
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+
+
+class TextRenderingModule:
+    def __init__(self, logger, font_path: Optional[str] = None):
+        """
+        Args:
+            logger: logger.log(msg, level=logging.INFO) 형태를 지원하는 로거
+            font_path (Optional[str]): 외부에서 기본 폰트 경로를 강제 지정할 때 사용 (보통 None)
+        """
+        self.logger = logger
+        self.default_font_size = 20
+        self.font_cache: Dict[str, ImageFont.FreeTypeFont] = {}
+
+        # 기본 폰트 번호 (요청사항: 3번)
+        self.default_font_number = 3
+
+        # /app/worker/fonts/ 내 폰트 맵 구성 및 유효성 검사
+        default_path = self._setup_default_fonts()
+
+        # 외부에서 font_path가 들어오면 우선 사용, 없으면 디폴트 선택
+        self.font_path = font_path or default_path
+
+        self.logger.log("텍스트 렌더링 모듈 초기화 완료", level=logging.INFO)
+        self.logger.log(f"기본 폰트 경로: {self.font_path}", level=logging.INFO)
+
+    # ---------------------------------------------------------------------
+    # 내부 설정: /app/worker/fonts/ 내 폰트들을 번호로 매핑
+    # ---------------------------------------------------------------------
+    def _setup_default_fonts(self) -> Optional[str]:
+        """
+        /app/worker/fonts/ 경로의 폰트 파일을 번호로 매핑한다.
+        - 1: HakgyoansimDunggeunmisoTTFB.ttf
+        - 2: NanumBarunGothic.ttf
+        - 3: NanumSquareRoundR.ttf  (기본)
+        - 4: gamtanload.ttf
+        - 5: Cafe24Ohsquare-v2.0.ttf
+
+        Returns:
+            기본으로 사용할 폰트 경로 (가능하면 3번, 없으면 첫 번째 유효한 폰트, 모두 없으면 None)
+        """
+        base_path = "/app/worker/fonts/"
+
+        self.font_map: Dict[int, str] = {
+            1: os.path.join(base_path, "HakgyoansimDunggeunmisoTTFB.ttf"),
+            2: os.path.join(base_path, "NanumBarunGothic.ttf"),
+            3: os.path.join(base_path, "NanumSquareRoundR.ttf"),
+            4: os.path.join(base_path, "gamtanload.ttf"),
+            5: os.path.join(base_path, "Cafe24Ohsquare-v2.0.ttf"),
+        }
+
+        # 실제 존재하는 폰트만 남김
+        for key in list(self.font_map.keys()):
+            path = self.font_map[key]
+            if not os.path.exists(path):
+                self.logger.log(f"[경고] 폰트 파일 없음: {path} -> font_map에서 제외", level=logging.WARNING)
+                del self.font_map[key]
+
+        # 기본(3번) 우선, 없으면 첫 번째 유효 폰트, 전무하면 None
+        if self.default_font_number in self.font_map:
+            return self.font_map[self.default_font_number]
+        elif len(self.font_map) > 0:
+            first_path = next(iter(self.font_map.values()))
+            self.logger.log(
+                f"[주의] 기본 3번 폰트가 없어 {first_path}로 대체 사용", level=logging.WARNING
+            )
+            return first_path
+        else:
+            self.logger.log(
+                "[오류] 사용 가능한 폰트가 없습니다. PIL 기본 폰트를 사용합니다.",
+                level=logging.ERROR,
+            )
+            return None
+
+    # ---------------------------------------------------------------------
+    # 헬퍼: 번호로 폰트 경로 가져오기
+    # ---------------------------------------------------------------------
+    def _get_font_path_by_number(self, font_number: Optional[int]) -> Optional[str]:
+        """
+        font_number로 self.font_map에서 경로를 선택.
+        - 유효하지 않으면 self.font_path(기본 경로) 반환
+        - 둘 다 없으면 None
+        """
+        if font_number is not None:
+            if font_number in self.font_map:
+                return self.font_map[font_number]
+            else:
+                self.logger.log(
+                    f"[경고] 알 수 없는 font_number={font_number}. 기본 폰트를 사용합니다.",
+                    level=logging.WARNING,
+                )
+        return self.font_path
+
+    # ---------------------------------------------------------------------
+    # 폰트 로딩 / 사이즈 측정
+    # ---------------------------------------------------------------------
+    def get_font(self, size: int, font_path: Optional[str] = None) -> ImageFont.FreeTypeFont:
+        """
+        폰트를 캐시하여 로딩 비용을 줄임
+        - font_path가 None이면 PIL 기본 폰트 사용
+        """
+        if font_path is None:
+            cache_key = f"__PIL_DEFAULT__{size}"
+            if cache_key not in self.font_cache:
+                try:
+                    font = ImageFont.load_default()
+                    self.font_cache[cache_key] = font
+                except Exception as e:
+                    self.logger.log(f"폰트 로드 오류(PIL 기본 폰트): {e}", level=logging.ERROR)
+                    font = ImageFont.load_default()
+                    self.font_cache[cache_key] = font
+            return self.font_cache[cache_key]
+
+        cache_key = f"{font_path}_{size}"
+        if cache_key not in self.font_cache:
+            try:
+                if os.path.exists(font_path):
+                    font = ImageFont.truetype(font_path, size)
+                else:
+                    self.logger.log(
+                        f"[경고] 지정 경로의 폰트가 존재하지 않음: {font_path}. PIL 기본 폰트 사용",
+                        level=logging.WARNING,
+                    )
+                    font = ImageFont.load_default()
+                self.font_cache[cache_key] = font
+            except Exception as e:
+                self.logger.log(f"폰트 로드 오류: {e}. PIL 기본 폰트 사용", level=logging.ERROR)
+                font = ImageFont.load_default()
+                self.font_cache[cache_key] = font
+        return self.font_cache[cache_key]
+
+    def estimate_text_size(self, text: str, font_size: int, font_path: Optional[str] = None) -> Tuple[int, int]:
+        """
+        주어진 폰트로 텍스트의 렌더링 크기를 추정
+        """
+        font = self.get_font(font_size, font_path)
+        try:
+            bbox = font.getbbox(text)
+            width = bbox[2] - bbox[0]
+            height = bbox[3] - bbox[1]
+        except AttributeError:
+            # Pillow 버전에 따라 getbbox가 없을 수 있음
+            width, height = font.getsize(text)
+        return width, height
+
+    def calculate_optimal_font_size(
+        self,
+        text: str,
+        target_width: int,
+        target_height: int,
+        min_size: int = 8,
+        max_size: int = 100,
+        font_path: Optional[str] = None,
+    ) -> int:
+        """
+        주어진 영역(target_width, target_height)에 들어가는 최대 폰트 크기 탐색
+        """
+        best_size = min_size
+        for size in range(min_size, max_size + 1):
+            width, height = self.estimate_text_size(text, size, font_path)
+            if width <= target_width and height <= target_height:
+                best_size = size
+            else:
+                break
+        return best_size
+
+    # ---------------------------------------------------------------------
+    # 색상 관련
+    # ---------------------------------------------------------------------
+    def _estimate_background_color(self, image: np.ndarray, x1: int, y1: int, x2: int, y2: int) -> Tuple[int, int, int]:
+        """
+        텍스트가 들어갈 영역 주변의 평균 색상(BGR)을 구한 뒤 RGB로 반환
+        """
+        margin = 5
+        y1_exp = max(0, y1 - margin)
+        y2_exp = min(image.shape[0], y2 + margin)
+        x1_exp = max(0, x1 - margin)
+        x2_exp = min(image.shape[1], x2 + margin)
+        region = image[y1_exp:y2_exp, x1_exp:x2_exp]
+        mean_color = np.mean(region, axis=(0, 1))  # BGR 평균
+        # RGB 튜플로 변환
+        return (int(mean_color[2]), int(mean_color[1]), int(mean_color[0]))
+
+    def _get_contrasting_color(self, bg_color: Tuple[int, int, int]) -> Tuple[int, int, int]:
+        """
+        배경색과 대비되는 텍스트 색상(RGB) 선택 (단순 라이트/다크 기준)
+        """
+        brightness = (bg_color[0] * 0.299 + bg_color[1] * 0.587 + bg_color[2] * 0.114)
+        if brightness > 128:
+            return (0, 0, 0)  # 밝으면 검정
+        else:
+            return (255, 255, 255)  # 어두우면 흰색
+
+    # ---------------------------------------------------------------------
+    # 메인 렌더링
+    # ---------------------------------------------------------------------
+    def render_text(
+        self,
+        image: np.ndarray,
+        ocr_results: List[Dict],
+        translated_texts: List[str],
+        font_number: Optional[int] = None,
+    ) -> np.ndarray:
+        """
+        OCR 폴리곤과 번역 텍스트 리스트를 받아 지정된 폰트로 중심 정렬 렌더링
+
+        Args:
+            image: 원본 BGR 이미지 (numpy.ndarray)
+            ocr_results: [{'polygon': [(x,y), ...]}, ...]
+            translated_texts: 각 영역에 대응하는 번역 텍스트 리스트
+            font_number: 사용할 폰트 번호 (None이면 기본 폰트)
+        """
+        print(f"render_text in translated_texts: {translated_texts}")
+        print(f"render_text in ocr_results : {ocr_results}")
+        print(f"render_text in font_number: {font_number}")
+        result_image = image.copy()
+
+        # 폰트 선택
+        selected_font_path = self._get_font_path_by_number(font_number)
+
+        for i, (ocr_result, translated_text) in enumerate(zip(ocr_results, translated_texts)):
+            polygon = ocr_result['polygon']
+            polygon_array = np.array(polygon)
+            x_coords = polygon_array[:, 0]
+            y_coords = polygon_array[:, 1]
+
+            x_min, x_max = int(np.min(x_coords)), int(np.max(x_coords))
+            y_min, y_max = int(np.min(y_coords)), int(np.max(y_coords))
+            width = max(1, x_max - x_min)
+            height = max(1, y_max - y_min)
+
+            optimal_font_size = self.calculate_optimal_font_size(
+                translated_text, width, height, font_path=selected_font_path
+            )
+
+            text_width, text_height = self.estimate_text_size(
+                translated_text, optimal_font_size, selected_font_path
+            )
+
+            center_x = (x_min + x_max) // 2
+            center_y = (y_min + y_max) // 2
+            text_x = center_x - text_width // 2
+            text_y = center_y - text_height // 2
+
+            angle = 0.0
+            if len(polygon_array) >= 2:
+                dx = float(polygon_array[1][0] - polygon_array[0][0])
+                dy = float(polygon_array[1][1] - polygon_array[0][1])
+                angle = math.degrees(math.atan2(dy, dx))
+
+            bg_color = self._estimate_background_color(image, x_min, y_min, x_max, y_max)
+            text_color = self._get_contrasting_color(bg_color)
+
+            result_image = self.render_text_on_image(
+                result_image,
+                translated_text,
+                (int(text_x), int(text_y)),
+                font_size=optimal_font_size,
+                font_path=selected_font_path,
+                text_color=text_color,
+                background_color=None,
+                angle=angle,
+            )
+
+        return result_image
+
+    def render_text_on_image(
+        self,
+        image: np.ndarray,
+        text: str,
+        position: Tuple[int, int],
+        font_size: Optional[int] = None,
+        font_path: Optional[str] = None,
+        text_color: Tuple[int, int, int] = (0, 0, 0),
+        background_color: Optional[Tuple[int, int, int]] = None,
+        angle: float = 0.0,
+    ) -> np.ndarray:
+        """
+        단일 텍스트를 지정 좌표에 그린다. (RGB 색상 인자 사용)
+        🔥 최적화: 메모리 효율성 개선
+        """
+        if font_size is None:
+            font_size = self.default_font_size
+
+        # 🔥 최적화: PIL 변환 최소화 - 필요한 영역만 처리
+        h, w = image.shape[:2]
+        text_width, text_height = self.estimate_text_size(text, font_size, font_path)
+        
+        # 텍스트 영역만 PIL로 처리하여 메모리 절약
+        padding = 20
+        x1 = max(0, position[0] - padding)
+        y1 = max(0, position[1] - padding)
+        x2 = min(w, position[0] + text_width + padding)
+        y2 = min(h, position[1] + text_height + padding)
+        
+        # 작은 영역만 PIL로 변환
+        roi = image[y1:y2, x1:x2]
+        pil_roi = Image.fromarray(cv2.cvtColor(roi, cv2.COLOR_BGR2RGB))
+        draw = ImageDraw.Draw(pil_roi)
+        font = self.get_font(font_size, font_path)
+
+        # 상대 좌표로 조정
+        rel_pos = (position[0] - x1, position[1] - y1)
+
+        # 배경 박스(옵션)
+        if background_color is not None:
+            bg_x1 = rel_pos[0] - 2
+            bg_y1 = rel_pos[1] - 2
+            bg_x2 = rel_pos[0] + text_width + 2
+            bg_y2 = rel_pos[1] + text_height + 2
+            draw.rectangle([bg_x1, bg_y1, bg_x2, bg_y2], fill=background_color)
+
+        # 회전 처리
+        if angle != 0:
+            text_image = Image.new('RGBA', (text_width + 10, text_height + 10), (255, 255, 255, 0))
+            text_draw = ImageDraw.Draw(text_image)
+            text_draw.text((5, 5), text, font=font, fill=text_color + (255,))
+            rotated_text = text_image.rotate(angle, expand=True)
+            pil_roi.paste(rotated_text, rel_pos, rotated_text)
+        else:
+            draw.text(rel_pos, text, font=font, fill=text_color)
+
+        # ROI만 다시 BGR로 변환하여 원본에 적용
+        result_roi = cv2.cvtColor(np.array(pil_roi), cv2.COLOR_RGB2BGR)
+        result_image = image.copy()
+        result_image[y1:y2, x1:x2] = result_roi
+        
+        return result_image
+
+    # ---------------------------------------------------------------------
+    # 스타일 관련 (선택 사용)
+    # ---------------------------------------------------------------------
+    def create_text_styles(self) -> Dict[str, Dict[str, Any]]:
+        """다양한 텍스트 스타일 정의"""
+        styles = {
+            'default': {
+                'color': (0, 0, 0),
+                'bg_color': None,
+                'outline': True,
+                'outline_color': (255, 255, 255),
+                'outline_width': 1,
+            },
+            'bold': {
+                'color': (0, 0, 0),
+                'bg_color': (255, 255, 255),
+                'outline': True,
+                'outline_color': (128, 128, 128),
+                'outline_width': 2,
+            },
+            'highlight': {
+                'color': (255, 255, 255),
+                'bg_color': (255, 0, 0),
+                'outline': False,
+                'outline_color': None,
+                'outline_width': 0,
+            },
+            'subtle': {
+                'color': (128, 128, 128),
+                'bg_color': None,
+                'outline': True,
+                'outline_color': (255, 255, 255),
+                'outline_width': 1,
+            },
+        }
+        return styles
+
+    def render_with_style(
+        self,
+        image: np.ndarray,
+        ocr_results: List[Dict],
+        translated_texts: List[str],
+        style_name: str = 'default',
+        font_number: Optional[int] = None,
+    ) -> np.ndarray:
+        """
+        스타일 사전의 색/배경/외곽선 옵션을 참고해서 렌더링.
+        현재 코드는 기본 렌더링 결과를 반환하며, 추가 효과(그림자/글로우 등)는 필요 시 확장.
+        """
+        styles = self.create_text_styles()
+
+        if style_name not in styles:
+            self.logger.log(f"[경고] 알 수 없는 스타일: {style_name}. 'default'로 대체", level=logging.WARNING)
+            style_name = 'default'
+
+        # NOTE: 현재 스타일 색을 직접 반영하려면 render_text 내부에서 색 적용 로직을 확장하면 됨.
+        # 여기서는 기본 렌더링만 수행.
+        result = self.render_text(
+            image=image,
+            ocr_results=ocr_results,
+            translated_texts=translated_texts,
+            font_number=font_number,
+        )
+
+        # (추가 스타일 후처리 자리)
+        return result
+
+    # ---------------------------------------------------------------------
+    # 텍스트 길이/크기 보정 유틸
+    # ---------------------------------------------------------------------
+    def adjust_text_for_space(
+        self,
+        text: str,
+        max_width: int,
+        max_height: int,
+        font_size: int,
+    ) -> Tuple[str, int]:
+        """
+        공간에 맞게 텍스트 조정 (간단 샘플 로직)
+        - 길면 줄바꿈/생략
+        - 필요 시 폰트 크기 감소
+        """
+        if len(text) > 20:
+            words = text.split(' ')
+            if len(words) > 1:
+                mid = len(words) // 2
+                text = ' '.join(words[:mid]) + '\n' + ' '.join(words[mid:])
+            else:
+                text = text[:15] + '...'
+
+        adjusted_font_size = font_size
+        while adjusted_font_size > 8:
+            estimated_width = int(len(text) * adjusted_font_size * 0.6)
+            if estimated_width <= max_width:
+                break
+            adjusted_font_size -= 2
+
+        return text, adjusted_font_size
+
+    # ---------------------------------------------------------------------
+    # 테스트/디버그용 비교 이미지 저장
+    # ---------------------------------------------------------------------
+    def _create_style_comparison(self, images: List[np.ndarray], style_names: List[str]):
+        """
+        스타일 비교 이미지 생성 (디버그 용도)
+        """
+        if not images:
+            return
+
+        # 이미지 크기 조정
+        target_width = 200
+        target_height = int(images[0].shape[0] * target_width / images[0].shape[1])
+
+        resized_images = [cv2.resize(img, (target_width, target_height)) for img in images]
+
+        # 비교 캔버스
+        num_images = len(resized_images)
+        comparison_width = target_width * num_images
+        comparison_height = target_height + 30
+
+        comparison = np.ones((comparison_height, comparison_width, 3), dtype=np.uint8) * 255
+
+        # 원본
+        comparison[30:30 + target_height, 0:target_width] = resized_images[0]
+        cv2.putText(comparison, "Original", (10, 20),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
+
+        # 스타일들
+        for i, (img, style_name) in enumerate(zip(resized_images[1:], style_names)):
+            x_offset = target_width * (i + 1)
+            comparison[30:30 + target_height, x_offset:x_offset + target_width] = img
+            cv2.putText(comparison, style_name, (x_offset + 10, 20),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
+
+        os.makedirs("test_output", exist_ok=True)
+        out_path = "test_output/text_style_comparison.jpg"
+        cv2.imwrite(out_path, comparison)
+        self.logger.log(f"스타일 비교 이미지 저장 완료: {out_path}", level=logging.INFO)
+
+
+# -------------------------------------------------------------------------
+# 사용 예시 (참고용)
+# -------------------------------------------------------------------------
+# logger = your_logger
+# tr = TextRenderingModule(logger)
+# result = tr.render_text(
+#     image=origin_img,
+#     ocr_results=[{'polygon': [(10,10),(110,10),(110,40),(10,40)]}],
+#     translated_texts=["예시 텍스트"],
+#     font_number=3  # 기본값 3번, 명시적으로 지정 가능
+# )
+# cv2.imwrite("result.jpg", result)
--- a/worker/text_rendering_module2.py
+++ b/worker/text_rendering_module2.py