diff --git a/test1/translated_result.png b/test1/translated_result.png index c04da4c..03152f5 100644 Binary files a/test1/translated_result.png and b/test1/translated_result.png differ diff --git a/test1/worker_test.py b/test1/worker_test.py index 724bf95..a665642 100644 --- a/test1/worker_test.py +++ b/test1/worker_test.py @@ -16,7 +16,7 @@ import requests API_ROOT = "http://localhost:7890" # 메인 서버 주소 -IMAGE_PATH = pathlib.Path("5.jpg") +IMAGE_PATH = pathlib.Path("6.jpg") TIMEOUT = 120 # 초 unwanted_texts = { @@ -43,6 +43,7 @@ toggle_states.update({ "blend_mode": "simple", # 단순 블렌딩 "performance_mode": True, # 빠른 경로 사용 "max_image_size": 1280, # 더 작은 크기 제한 + "roi_area_high": 0.0 # 기본값: 0.60 → 0.0으로 변경 # 풀프레임 인페인팅 강제 }) diff --git a/worker/text_rendering_module.py b/worker/text_rendering_module.py index 1a7436d..a4c8858 100644 --- a/worker/text_rendering_module.py +++ b/worker/text_rendering_module.py @@ -1,496 +1,513 @@ -# -*- coding: utf-8 -*- -""" -텍스트 렌더링 모듈 - 인페인팅된 이미지에 번역된 텍스트를 자연스럽게 렌더링 (라이브러리화) -- /app/worker/fonts/ 내 폰트를 font_map으로 관리 -- 외부에서 render_text 호출 시 font_number로 폰트 선택 -- 기본 폰트는 3번(NanumSquareRoundR.ttf)로 설정 -""" - -import os -import math -import logging -from typing import List, Dict, Any, Tuple, Optional - -import cv2 -import numpy as np -from PIL import Image, ImageDraw, ImageFont - - -class TextRenderingModule: - def __init__(self, logger, font_path: Optional[str] = None): - """ - Args: - logger: logger.log(msg, level=logging.INFO) 형태를 지원하는 로거 - font_path (Optional[str]): 외부에서 기본 폰트 경로를 강제 지정할 때 사용 (보통 None) - """ - self.logger = logger - self.default_font_size = 20 - self.font_cache: Dict[str, ImageFont.FreeTypeFont] = {} - - # 기본 폰트 번호 (요청사항: 3번) - self.default_font_number = 3 - - # /app/worker/fonts/ 내 폰트 맵 구성 및 유효성 검사 - default_path = self._setup_default_fonts() - - # 외부에서 font_path가 들어오면 우선 사용, 없으면 디폴트 선택 - self.font_path = font_path or default_path - - self.logger.log("텍스트 렌더링 모듈 초기화 완료", level=logging.INFO) - self.logger.log(f"기본 폰트 경로: {self.font_path}", level=logging.INFO) - - # --------------------------------------------------------------------- - # 내부 설정: /app/worker/fonts/ 내 폰트들을 번호로 매핑 - # --------------------------------------------------------------------- - def _setup_default_fonts(self) -> Optional[str]: - """ - /app/worker/fonts/ 경로의 폰트 파일을 번호로 매핑한다. - - 1: HakgyoansimDunggeunmisoTTFB.ttf - - 2: NanumBarunGothic.ttf - - 3: NanumSquareRoundR.ttf (기본) - - 4: gamtanload.ttf - - 5: Cafe24Ohsquare-v2.0.ttf - - Returns: - 기본으로 사용할 폰트 경로 (가능하면 3번, 없으면 첫 번째 유효한 폰트, 모두 없으면 None) - """ - base_path = "/app/worker/fonts/" - - self.font_map: Dict[int, str] = { - 1: os.path.join(base_path, "HakgyoansimDunggeunmisoTTFB.ttf"), - 2: os.path.join(base_path, "NanumBarunGothic.ttf"), - 3: os.path.join(base_path, "NanumSquareRoundR.ttf"), - 4: os.path.join(base_path, "gamtanload.ttf"), - 5: os.path.join(base_path, "Cafe24Ohsquare-v2.0.ttf"), - } - - # 실제 존재하는 폰트만 남김 - for key in list(self.font_map.keys()): - path = self.font_map[key] - if not os.path.exists(path): - self.logger.log(f"[경고] 폰트 파일 없음: {path} -> font_map에서 제외", level=logging.WARNING) - del self.font_map[key] - - # 기본(3번) 우선, 없으면 첫 번째 유효 폰트, 전무하면 None - if self.default_font_number in self.font_map: - return self.font_map[self.default_font_number] - elif len(self.font_map) > 0: - first_path = next(iter(self.font_map.values())) - self.logger.log( - f"[주의] 기본 3번 폰트가 없어 {first_path}로 대체 사용", level=logging.WARNING - ) - return first_path - else: - self.logger.log( - "[오류] 사용 가능한 폰트가 없습니다. PIL 기본 폰트를 사용합니다.", - level=logging.ERROR, - ) - return None - - # --------------------------------------------------------------------- - # 헬퍼: 번호로 폰트 경로 가져오기 - # --------------------------------------------------------------------- - def _get_font_path_by_number(self, font_number: Optional[int]) -> Optional[str]: - """ - font_number로 self.font_map에서 경로를 선택. - - 유효하지 않으면 self.font_path(기본 경로) 반환 - - 둘 다 없으면 None - """ - if font_number is not None: - if font_number in self.font_map: - return self.font_map[font_number] - else: - self.logger.log( - f"[경고] 알 수 없는 font_number={font_number}. 기본 폰트를 사용합니다.", - level=logging.WARNING, - ) - return self.font_path - - # --------------------------------------------------------------------- - # 폰트 로딩 / 사이즈 측정 - # --------------------------------------------------------------------- - def get_font(self, size: int, font_path: Optional[str] = None) -> ImageFont.FreeTypeFont: - """ - 폰트를 캐시하여 로딩 비용을 줄임 - - font_path가 None이면 PIL 기본 폰트 사용 - """ - if font_path is None: - cache_key = f"__PIL_DEFAULT__{size}" - if cache_key not in self.font_cache: - try: - font = ImageFont.load_default() - self.font_cache[cache_key] = font - except Exception as e: - self.logger.log(f"폰트 로드 오류(PIL 기본 폰트): {e}", level=logging.ERROR) - font = ImageFont.load_default() - self.font_cache[cache_key] = font - return self.font_cache[cache_key] - - cache_key = f"{font_path}_{size}" - if cache_key not in self.font_cache: - try: - if os.path.exists(font_path): - font = ImageFont.truetype(font_path, size) - else: - self.logger.log( - f"[경고] 지정 경로의 폰트가 존재하지 않음: {font_path}. PIL 기본 폰트 사용", - level=logging.WARNING, - ) - font = ImageFont.load_default() - self.font_cache[cache_key] = font - except Exception as e: - self.logger.log(f"폰트 로드 오류: {e}. PIL 기본 폰트 사용", level=logging.ERROR) - font = ImageFont.load_default() - self.font_cache[cache_key] = font - return self.font_cache[cache_key] - - def estimate_text_size(self, text: str, font_size: int, font_path: Optional[str] = None) -> Tuple[int, int]: - """ - 주어진 폰트로 텍스트의 렌더링 크기를 추정 - """ - font = self.get_font(font_size, font_path) - try: - bbox = font.getbbox(text) - width = bbox[2] - bbox[0] - height = bbox[3] - bbox[1] - except AttributeError: - # Pillow 버전에 따라 getbbox가 없을 수 있음 - width, height = font.getsize(text) - return width, height - - def calculate_optimal_font_size( - self, - text: str, - target_width: int, - target_height: int, - min_size: int = 8, - max_size: int = 100, - font_path: Optional[str] = None, - ) -> int: - """ - 주어진 영역(target_width, target_height)에 들어가는 최대 폰트 크기 탐색 - """ - best_size = min_size - for size in range(min_size, max_size + 1): - width, height = self.estimate_text_size(text, size, font_path) - if width <= target_width and height <= target_height: - best_size = size - else: - break - return best_size - - # --------------------------------------------------------------------- - # 색상 관련 - # --------------------------------------------------------------------- - def _estimate_background_color(self, image: np.ndarray, x1: int, y1: int, x2: int, y2: int) -> Tuple[int, int, int]: - """ - 텍스트가 들어갈 영역 주변의 평균 색상(BGR)을 구한 뒤 RGB로 반환 - """ - margin = 5 - y1_exp = max(0, y1 - margin) - y2_exp = min(image.shape[0], y2 + margin) - x1_exp = max(0, x1 - margin) - x2_exp = min(image.shape[1], x2 + margin) - region = image[y1_exp:y2_exp, x1_exp:x2_exp] - mean_color = np.mean(region, axis=(0, 1)) # BGR 평균 - # RGB 튜플로 변환 - return (int(mean_color[2]), int(mean_color[1]), int(mean_color[0])) - - def _get_contrasting_color(self, bg_color: Tuple[int, int, int]) -> Tuple[int, int, int]: - """ - 배경색과 대비되는 텍스트 색상(RGB) 선택 (단순 라이트/다크 기준) - """ - brightness = (bg_color[0] * 0.299 + bg_color[1] * 0.587 + bg_color[2] * 0.114) - if brightness > 128: - return (0, 0, 0) # 밝으면 검정 - else: - return (255, 255, 255) # 어두우면 흰색 - - # --------------------------------------------------------------------- - # 메인 렌더링 - # --------------------------------------------------------------------- - def render_text( - self, - image: np.ndarray, - ocr_results: List[Dict], - translated_texts: List[str], - font_number: Optional[int] = None, - ) -> np.ndarray: - """ - OCR 폴리곤과 번역 텍스트 리스트를 받아 지정된 폰트로 중심 정렬 렌더링 - - Args: - image: 원본 BGR 이미지 (numpy.ndarray) - ocr_results: [{'polygon': [(x,y), ...]}, ...] - translated_texts: 각 영역에 대응하는 번역 텍스트 리스트 - font_number: 사용할 폰트 번호 (None이면 기본 폰트) - """ - print(f"render_text in translated_texts: {translated_texts}") - print(f"render_text in ocr_results : {ocr_results}") - print(f"render_text in font_number: {font_number}") - result_image = image.copy() - - # 폰트 선택 - selected_font_path = self._get_font_path_by_number(font_number) - - for i, (ocr_result, translated_text) in enumerate(zip(ocr_results, translated_texts)): - polygon = ocr_result['polygon'] - polygon_array = np.array(polygon) - x_coords = polygon_array[:, 0] - y_coords = polygon_array[:, 1] - - x_min, x_max = int(np.min(x_coords)), int(np.max(x_coords)) - y_min, y_max = int(np.min(y_coords)), int(np.max(y_coords)) - width = max(1, x_max - x_min) - height = max(1, y_max - y_min) - - optimal_font_size = self.calculate_optimal_font_size( - translated_text, width, height, font_path=selected_font_path - ) - - text_width, text_height = self.estimate_text_size( - translated_text, optimal_font_size, selected_font_path - ) - - center_x = (x_min + x_max) // 2 - center_y = (y_min + y_max) // 2 - text_x = center_x - text_width // 2 - text_y = center_y - text_height // 2 - - angle = 0.0 - if len(polygon_array) >= 2: - dx = float(polygon_array[1][0] - polygon_array[0][0]) - dy = float(polygon_array[1][1] - polygon_array[0][1]) - angle = math.degrees(math.atan2(dy, dx)) - - bg_color = self._estimate_background_color(image, x_min, y_min, x_max, y_max) - text_color = self._get_contrasting_color(bg_color) - - result_image = self.render_text_on_image( - result_image, - translated_text, - (int(text_x), int(text_y)), - font_size=optimal_font_size, - font_path=selected_font_path, - text_color=text_color, - background_color=None, - angle=angle, - ) - - return result_image - - def render_text_on_image( - self, - image: np.ndarray, - text: str, - position: Tuple[int, int], - font_size: Optional[int] = None, - font_path: Optional[str] = None, - text_color: Tuple[int, int, int] = (0, 0, 0), - background_color: Optional[Tuple[int, int, int]] = None, - angle: float = 0.0, - ) -> np.ndarray: - """ - 단일 텍스트를 지정 좌표에 그린다. (RGB 색상 인자 사용) - 🔥 최적화: 메모리 효율성 개선 - """ - if font_size is None: - font_size = self.default_font_size - - # 🔥 최적화: PIL 변환 최소화 - 필요한 영역만 처리 - h, w = image.shape[:2] - text_width, text_height = self.estimate_text_size(text, font_size, font_path) - - # 텍스트 영역만 PIL로 처리하여 메모리 절약 - padding = 20 - x1 = max(0, position[0] - padding) - y1 = max(0, position[1] - padding) - x2 = min(w, position[0] + text_width + padding) - y2 = min(h, position[1] + text_height + padding) - - # 작은 영역만 PIL로 변환 - roi = image[y1:y2, x1:x2] - pil_roi = Image.fromarray(cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)) - draw = ImageDraw.Draw(pil_roi) - font = self.get_font(font_size, font_path) - - # 상대 좌표로 조정 - rel_pos = (position[0] - x1, position[1] - y1) - - # 배경 박스(옵션) - if background_color is not None: - bg_x1 = rel_pos[0] - 2 - bg_y1 = rel_pos[1] - 2 - bg_x2 = rel_pos[0] + text_width + 2 - bg_y2 = rel_pos[1] + text_height + 2 - draw.rectangle([bg_x1, bg_y1, bg_x2, bg_y2], fill=background_color) - - # 회전 처리 - if angle != 0: - text_image = Image.new('RGBA', (text_width + 10, text_height + 10), (255, 255, 255, 0)) - text_draw = ImageDraw.Draw(text_image) - text_draw.text((5, 5), text, font=font, fill=text_color + (255,)) - rotated_text = text_image.rotate(angle, expand=True) - pil_roi.paste(rotated_text, rel_pos, rotated_text) - else: - draw.text(rel_pos, text, font=font, fill=text_color) - - # ROI만 다시 BGR로 변환하여 원본에 적용 - result_roi = cv2.cvtColor(np.array(pil_roi), cv2.COLOR_RGB2BGR) - result_image = image.copy() - result_image[y1:y2, x1:x2] = result_roi - - return result_image - - # --------------------------------------------------------------------- - # 스타일 관련 (선택 사용) - # --------------------------------------------------------------------- - def create_text_styles(self) -> Dict[str, Dict[str, Any]]: - """다양한 텍스트 스타일 정의""" - styles = { - 'default': { - 'color': (0, 0, 0), - 'bg_color': None, - 'outline': True, - 'outline_color': (255, 255, 255), - 'outline_width': 1, - }, - 'bold': { - 'color': (0, 0, 0), - 'bg_color': (255, 255, 255), - 'outline': True, - 'outline_color': (128, 128, 128), - 'outline_width': 2, - }, - 'highlight': { - 'color': (255, 255, 255), - 'bg_color': (255, 0, 0), - 'outline': False, - 'outline_color': None, - 'outline_width': 0, - }, - 'subtle': { - 'color': (128, 128, 128), - 'bg_color': None, - 'outline': True, - 'outline_color': (255, 255, 255), - 'outline_width': 1, - }, - } - return styles - - def render_with_style( - self, - image: np.ndarray, - ocr_results: List[Dict], - translated_texts: List[str], - style_name: str = 'default', - font_number: Optional[int] = None, - ) -> np.ndarray: - """ - 스타일 사전의 색/배경/외곽선 옵션을 참고해서 렌더링. - 현재 코드는 기본 렌더링 결과를 반환하며, 추가 효과(그림자/글로우 등)는 필요 시 확장. - """ - styles = self.create_text_styles() - - if style_name not in styles: - self.logger.log(f"[경고] 알 수 없는 스타일: {style_name}. 'default'로 대체", level=logging.WARNING) - style_name = 'default' - - # NOTE: 현재 스타일 색을 직접 반영하려면 render_text 내부에서 색 적용 로직을 확장하면 됨. - # 여기서는 기본 렌더링만 수행. - result = self.render_text( - image=image, - ocr_results=ocr_results, - translated_texts=translated_texts, - font_number=font_number, - ) - - # (추가 스타일 후처리 자리) - return result - - # --------------------------------------------------------------------- - # 텍스트 길이/크기 보정 유틸 - # --------------------------------------------------------------------- - def adjust_text_for_space( - self, - text: str, - max_width: int, - max_height: int, - font_size: int, - ) -> Tuple[str, int]: - """ - 공간에 맞게 텍스트 조정 (간단 샘플 로직) - - 길면 줄바꿈/생략 - - 필요 시 폰트 크기 감소 - """ - if len(text) > 20: - words = text.split(' ') - if len(words) > 1: - mid = len(words) // 2 - text = ' '.join(words[:mid]) + '\n' + ' '.join(words[mid:]) - else: - text = text[:15] + '...' - - adjusted_font_size = font_size - while adjusted_font_size > 8: - estimated_width = int(len(text) * adjusted_font_size * 0.6) - if estimated_width <= max_width: - break - adjusted_font_size -= 2 - - return text, adjusted_font_size - - # --------------------------------------------------------------------- - # 테스트/디버그용 비교 이미지 저장 - # --------------------------------------------------------------------- - def _create_style_comparison(self, images: List[np.ndarray], style_names: List[str]): - """ - 스타일 비교 이미지 생성 (디버그 용도) - """ - if not images: - return - - # 이미지 크기 조정 - target_width = 200 - target_height = int(images[0].shape[0] * target_width / images[0].shape[1]) - - resized_images = [cv2.resize(img, (target_width, target_height)) for img in images] - - # 비교 캔버스 - num_images = len(resized_images) - comparison_width = target_width * num_images - comparison_height = target_height + 30 - - comparison = np.ones((comparison_height, comparison_width, 3), dtype=np.uint8) * 255 - - # 원본 - comparison[30:30 + target_height, 0:target_width] = resized_images[0] - cv2.putText(comparison, "Original", (10, 20), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) - - # 스타일들 - for i, (img, style_name) in enumerate(zip(resized_images[1:], style_names)): - x_offset = target_width * (i + 1) - comparison[30:30 + target_height, x_offset:x_offset + target_width] = img - cv2.putText(comparison, style_name, (x_offset + 10, 20), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) - - os.makedirs("test_output", exist_ok=True) - out_path = "test_output/text_style_comparison.jpg" - cv2.imwrite(out_path, comparison) - self.logger.log(f"스타일 비교 이미지 저장 완료: {out_path}", level=logging.INFO) - - -# ------------------------------------------------------------------------- -# 사용 예시 (참고용) -# ------------------------------------------------------------------------- -# logger = your_logger -# tr = TextRenderingModule(logger) -# result = tr.render_text( -# image=origin_img, -# ocr_results=[{'polygon': [(10,10),(110,10),(110,40),(10,40)]}], -# translated_texts=["예시 텍스트"], -# font_number=3 # 기본값 3번, 명시적으로 지정 가능 -# ) -# cv2.imwrite("result.jpg", result) +# -*- coding: utf-8 -*- +""" +🔥 고급 텍스트 렌더링 모듈 - 4점 폴리곤 원근 투영, WCAG 대비, 고품질 셰이핑 +- Pango/Cairo 우선, Pillow 폴백 +- 4점 폴리곤(사다리꼴) 원근 투영 합성 +- WCAG 대비(4.5:1) 자동 보정 +- 외곽선/섀도/글로우 + 블렌딩 모드 지원 +- 한글/CJK 줄바꿈 폭맞춤, 이분 탐색 폰트 피팅 +""" + +import os +import math +import logging +from typing import List, Dict, Any, Tuple, Optional, Union +from enum import Enum +import json + +import cv2 +import numpy as np +from PIL import Image, ImageDraw, ImageFont +from PIL.ImageColor import getrgb + +# 🔥 고급 렌더링 라이브러리 (안전한 임포트) +CAIRO_AVAILABLE = False +cairo = None +Pango = None +PangoCairo = None + +try: + import cairo + import gi + gi.require_version('Pango', '1.0') + gi.require_version('PangoCairo', '1.0') + from gi.repository import Pango, PangoCairo + CAIRO_AVAILABLE = True + print("✅ Cairo/Pango 지원 활성화") +except (ImportError, ValueError, AttributeError) as e: + CAIRO_AVAILABLE = False + print(f"⚠️ Cairo/Pango 미지원, Pillow 폴백 사용: {e}") + + +class BlendMode(Enum): + """블렌딩 모드""" + NORMAL = "normal" + MULTIPLY = "multiply" + SCREEN = "screen" + OVERLAY = "overlay" + SOFT_LIGHT = "soft_light" + + +class TextEffect(Enum): + """텍스트 효과""" + NONE = "none" + OUTLINE = "outline" + SHADOW = "shadow" + GLOW = "glow" + EMBOSS = "emboss" + + +class TextRenderingModule: + def __init__(self, logger, font_path: Optional[str] = None): + """ + 🔥 고급 텍스트 렌더링 모듈 초기화 + + Args: + logger: 로거 객체 + font_path: 기본 폰트 경로 (선택사항) + """ + self.logger = logger + self.default_font_size = 20 + self.font_cache: Dict[str, Union[ImageFont.FreeTypeFont, str]] = {} + self.cairo_available = CAIRO_AVAILABLE + + # 기본 폰트 번호 (3번) + self.default_font_number = 3 + + # 🔥 고급 렌더링 설정 + self.quality_settings = { + 'dpi': 150, # 고해상도 렌더링 + 'antialiasing': True, + 'subpixel_rendering': True, + 'font_hinting': True, + } + + # 🔥 WCAG 대비 비율 설정 + self.wcag_contrast_ratios = { + 'AA_normal': 4.5, # WCAG AA 일반 텍스트 + 'AA_large': 3.0, # WCAG AA 큰 텍스트 + 'AAA_normal': 7.0, # WCAG AAA 일반 텍스트 + 'AAA_large': 4.5, # WCAG AAA 큰 텍스트 + } + + # 폰트 맵 설정 + default_path = self._setup_default_fonts() + self.font_path = font_path or default_path + + self.logger.log("🔥 고급 텍스트 렌더링 모듈 초기화 완료", level=logging.INFO) + self.logger.log(f"Cairo 지원: {self.cairo_available}", level=logging.INFO) + self.logger.log(f"기본 폰트 경로: {self.font_path}", level=logging.INFO) + + def _setup_default_fonts(self) -> Optional[str]: + """폰트 맵 설정 (기존 로직 유지)""" + base_path = "/app/worker/fonts/" + + self.font_map: Dict[int, str] = { + 1: os.path.join(base_path, "HakgyoansimDunggeunmisoTTFB.ttf"), + 2: os.path.join(base_path, "NanumBarunGothic.ttf"), + 3: os.path.join(base_path, "NanumSquareRoundR.ttf"), + 4: os.path.join(base_path, "gamtanload.ttf"), + 5: os.path.join(base_path, "Cafe24Ohsquare-v2.0.ttf"), + } + + # 실제 존재하는 폰트만 남김 + for key in list(self.font_map.keys()): + path = self.font_map[key] + if not os.path.exists(path): + self.logger.log(f"[경고] 폰트 파일 없음: {path}", level=logging.WARNING) + del self.font_map[key] + + # 기본 폰트 반환 + if self.default_font_number in self.font_map: + return self.font_map[self.default_font_number] + elif len(self.font_map) > 0: + return next(iter(self.font_map.values())) + else: + self.logger.log("[오류] 사용 가능한 폰트가 없습니다.", level=logging.ERROR) + return None + + def _get_font_path_by_number(self, font_number: Optional[int]) -> Optional[str]: + """번호로 폰트 경로 선택""" + if font_number is not None and font_number in self.font_map: + return self.font_map[font_number] + return self.font_path + + # 🔥 ==================== 색상/대비 관련 ==================== + + def _calculate_luminance(self, color: Tuple[int, int, int]) -> float: + """WCAG 기준 휘도 계산""" + def linearize(c): + c = c / 255.0 + return c / 12.92 if c <= 0.03928 else ((c + 0.055) / 1.055) ** 2.4 + + r, g, b = color + return 0.2126 * linearize(r) + 0.7152 * linearize(g) + 0.0722 * linearize(b) + + def _calculate_contrast_ratio(self, color1: Tuple[int, int, int], color2: Tuple[int, int, int]) -> float: + """WCAG 기준 대비 비율 계산""" + lum1 = self._calculate_luminance(color1) + lum2 = self._calculate_luminance(color2) + + lighter = max(lum1, lum2) + darker = min(lum1, lum2) + + return (lighter + 0.05) / (darker + 0.05) + + def _get_wcag_compliant_color(self, bg_color: Tuple[int, int, int], + target_ratio: float = 4.5, + prefer_dark: bool = True) -> Tuple[int, int, int]: + """🔥 WCAG 대비 비율을 만족하는 텍스트 색상 생성""" + + # 기본 후보들 + candidates = [ + (0, 0, 0), # 검정 + (255, 255, 255), # 흰색 + (33, 37, 41), # 진한 회색 + (248, 249, 250), # 밝은 회색 + ] + + best_color = (0, 0, 0) if prefer_dark else (255, 255, 255) + best_ratio = self._calculate_contrast_ratio(bg_color, best_color) + + # 후보 중 최적 선택 + for candidate in candidates: + ratio = self._calculate_contrast_ratio(bg_color, candidate) + if ratio >= target_ratio and ratio > best_ratio: + best_color = candidate + best_ratio = ratio + + # 요구 비율을 만족하지 못하면 강제 조정 + if best_ratio < target_ratio: + bg_luminance = self._calculate_luminance(bg_color) + if bg_luminance > 0.5: # 밝은 배경 + best_color = (0, 0, 0) # 검정 강제 + else: # 어두운 배경 + best_color = (255, 255, 255) # 흰색 강제 + + self.logger.log(f"WCAG 대비 보정: 배경{bg_color} → 텍스트{best_color} (비율: {best_ratio:.2f})", + level=logging.DEBUG) + + return best_color + + def _estimate_background_color(self, image: np.ndarray, polygon: List[Tuple[int, int]]) -> Tuple[int, int, int]: + """🔥 개선된 배경색 추정 (폴리곤 기반)""" + + # 폴리곤을 마스크로 변환 + mask = np.zeros(image.shape[:2], dtype=np.uint8) + polygon_array = np.array(polygon, dtype=np.int32) + cv2.fillPoly(mask, [polygon_array], 255) + + # 폴리곤 영역 확장 (컨텍스트 고려) + kernel = np.ones((15, 15), np.uint8) + expanded_mask = cv2.dilate(mask, kernel, iterations=1) + context_mask = cv2.subtract(expanded_mask, mask) + + # 컨텍스트 영역의 평균 색상 (BGR) + if np.sum(context_mask) > 0: + masked_region = image[context_mask > 0] + mean_color = np.mean(masked_region, axis=0) + else: + # 폴백: 전체 이미지 평균 + mean_color = np.mean(image, axis=(0, 1)) + + # BGR → RGB 변환 + return (int(mean_color[2]), int(mean_color[1]), int(mean_color[0])) + + # 🔥 ==================== 고급 폰트 피팅 (이분 탐색) ==================== + + def _measure_text_size_pillow(self, text: str, font_path: str, font_size: float) -> Tuple[int, int]: + """Pillow 기반 텍스트 크기 측정""" + try: + font = ImageFont.truetype(font_path, int(font_size)) + bbox = font.getbbox(text) + return bbox[2] - bbox[0], bbox[3] - bbox[1] + except Exception: + # 기본 폰트 폴백 + font = ImageFont.load_default() + bbox = font.getbbox(text) + return bbox[2] - bbox[0], bbox[3] - bbox[1] + + def _fit_text_binary_search(self, text: str, target_width: int, target_height: int, + font_path: str, min_size: float = 8.0, max_size: float = 200.0, + tolerance: float = 0.95) -> float: + """🔥 이분 탐색을 통한 정확한 폰트 크기 피팅""" + + if not text.strip(): + return min_size + + # 이분 탐색 + low, high = min_size, max_size + best_size = min_size + + while high - low > 0.5: # 0.5pt 정밀도 + mid = (low + high) / 2 + width, height = self._measure_text_size_pillow(text, font_path, mid) + + # 여백 고려 (90% 사용) + fits_width = width <= target_width * tolerance + fits_height = height <= target_height * tolerance + + if fits_width and fits_height: + best_size = mid + low = mid + else: + high = mid + + return best_size + + # 🔥 ==================== 한글/CJK 줄바꿈 처리 ==================== + + def _is_cjk_character(self, char: str) -> bool: + """CJK 문자 판별""" + code = ord(char) + return ( + 0x4E00 <= code <= 0x9FFF or # CJK Unified Ideographs + 0x3400 <= code <= 0x4DBF or # CJK Extension A + 0xAC00 <= code <= 0xD7AF or # Hangul Syllables + 0x1100 <= code <= 0x11FF or # Hangul Jamo + 0x3130 <= code <= 0x318F or # Hangul Compatibility Jamo + 0xFF00 <= code <= 0xFFEF # Halfwidth and Fullwidth Forms + ) + + def _smart_text_wrap(self, text: str, max_width: int, font_path: str, font_size: float) -> List[str]: + """🔥 한글/CJK 지능형 줄바꿈""" + + if not text.strip(): + return [text] + + lines = [] + words = text.split() + + if not words: + return [text] + + current_line = "" + + for word in words: + test_line = current_line + (" " if current_line else "") + word + + # 현재 줄 + 새 단어의 너비 측정 + width, _ = self._measure_text_size_pillow(test_line, font_path, font_size) + + if width <= max_width * 0.95: # 5% 여백 + current_line = test_line + else: + # 현재 줄이 비어있지 않으면 저장하고 새 줄 시작 + if current_line: + lines.append(current_line) + current_line = word + else: + # 단어 자체가 너무 길면 CJK 단위로 분할 + if any(self._is_cjk_character(c) for c in word): + lines.extend(self._break_cjk_word(word, max_width, font_path, font_size)) + else: + lines.append(word) # 강제 추가 + + if current_line: + lines.append(current_line) + + return lines if lines else [text] + + def _break_cjk_word(self, word: str, max_width: int, font_path: str, font_size: float) -> List[str]: + """CJK 단어를 문자 단위로 분할""" + lines = [] + current_line = "" + + for char in word: + test_line = current_line + char + width, _ = self._measure_text_size_pillow(test_line, font_path, font_size) + + if width <= max_width * 0.95: + current_line = test_line + else: + if current_line: + lines.append(current_line) + current_line = char + + if current_line: + lines.append(current_line) + + return lines if lines else [word] + + # 🔥 ==================== 메인 렌더링 함수 (Pillow 기반) ==================== + + def render_text(self, image: np.ndarray, ocr_results: List[Dict], translated_texts: List[str], + font_number: Optional[int] = None, **kwargs) -> np.ndarray: + """ + 🔥 고급 텍스트 렌더링 메인 함수 (Pillow 기반 + 개선된 알고리즘) + + Args: + image: 원본 BGR 이미지 + ocr_results: [{'polygon': [(x,y), ...]}, ...] + translated_texts: 번역된 텍스트 리스트 + font_number: 폰트 번호 + **kwargs: 고급 옵션들 + """ + + self.logger.log(f"🔥 고급 텍스트 렌더링 시작: {len(translated_texts)}개 텍스트", level=logging.INFO) + + # 옵션 파싱 + wcag_level = kwargs.get('wcag_level', 'AA_normal') + enable_smart_wrap = kwargs.get('enable_smart_wrap', True) + + result_image = image.copy() + selected_font_path = self._get_font_path_by_number(font_number) + + if not selected_font_path: + self.logger.log("폰트 경로를 찾을 수 없음, 기본 처리 사용", level=logging.WARNING) + return result_image + + target_contrast_ratio = self.wcag_contrast_ratios.get(wcag_level, 4.5) + + for i, (ocr_result, translated_text) in enumerate(zip(ocr_results, translated_texts)): + if not translated_text.strip(): + continue + + try: + polygon = ocr_result['polygon'] + + # 🔥 배경색 추정 (폴리곤 기반) + bg_color = self._estimate_background_color(image, polygon) + + # 🔥 WCAG 준수 텍스트 색상 + text_color = self._get_wcag_compliant_color(bg_color, target_contrast_ratio) + + # 폴리곤 바운딩 박스 + polygon_array = np.array(polygon) + x_coords = polygon_array[:, 0] + y_coords = polygon_array[:, 1] + + x_min, x_max = int(np.min(x_coords)), int(np.max(x_coords)) + y_min, y_max = int(np.min(y_coords)), int(np.max(y_coords)) + + bbox_width = max(1, x_max - x_min) + bbox_height = max(1, y_max - y_min) + + # 🔥 이분 탐색으로 최적 폰트 크기 찾기 + optimal_font_size = self._fit_text_binary_search( + translated_text, bbox_width, bbox_height, selected_font_path + ) + + # 🔥 지능형 줄바꿈 (필요시) + if enable_smart_wrap: + lines = self._smart_text_wrap(translated_text, bbox_width, selected_font_path, optimal_font_size) + else: + lines = [translated_text] + + # 🔥 고품질 텍스트 렌더링 + result_image = self._render_text_with_effects( + result_image, lines, (x_min, y_min), bbox_width, bbox_height, + selected_font_path, optimal_font_size, text_color + ) + + self.logger.log(f"텍스트 {i+1}/{len(translated_texts)} 렌더링 완료", level=logging.DEBUG) + + except Exception as e: + self.logger.log(f"텍스트 {i+1} 렌더링 실패: {e}", level=logging.WARNING) + continue + + self.logger.log("🔥 고급 텍스트 렌더링 완료", level=logging.INFO) + return result_image + + def _render_text_with_effects(self, image: np.ndarray, lines: List[str], + position: Tuple[int, int], bbox_width: int, bbox_height: int, + font_path: str, font_size: float, text_color: Tuple[int, int, int]) -> np.ndarray: + """🔥 외곽선이 있는 고품질 텍스트 렌더링""" + + x_min, y_min = position + + # 각 줄을 렌더링 + line_height = font_size * 1.2 # 행간 + total_text_height = len(lines) * line_height + start_y = y_min + (bbox_height - total_text_height) // 2 + + result_image = image.copy() + + for line_idx, line in enumerate(lines): + if not line.strip(): + continue + + line_y = int(start_y + line_idx * line_height) + + # 텍스트 크기 측정 + text_width, text_height = self._measure_text_size_pillow(line, font_path, font_size) + + # 중앙 정렬 + text_x = x_min + (bbox_width - text_width) // 2 + + # 🔥 외곽선 + 텍스트 렌더링 + result_image = self._render_single_line_with_outline( + result_image, line, (text_x, line_y), font_path, font_size, text_color + ) + + return result_image + + def _render_single_line_with_outline(self, image: np.ndarray, text: str, position: Tuple[int, int], + font_path: str, font_size: float, text_color: Tuple[int, int, int]) -> np.ndarray: + """단일 줄 텍스트를 외곽선과 함께 렌더링""" + + h, w = image.shape[:2] + text_width, text_height = self._measure_text_size_pillow(text, font_path, font_size) + + # 텍스트 영역만 PIL로 처리하여 메모리 절약 + padding = 20 + x1 = max(0, position[0] - padding) + y1 = max(0, position[1] - padding) + x2 = min(w, position[0] + text_width + padding) + y2 = min(h, position[1] + text_height + padding) + + # 작은 영역만 PIL로 변환 + roi = image[y1:y2, x1:x2] + pil_roi = Image.fromarray(cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)) + draw = ImageDraw.Draw(pil_roi) + + try: + font = ImageFont.truetype(font_path, int(font_size)) + except Exception: + font = ImageFont.load_default() + + # 상대 좌표로 조정 + rel_pos = (position[0] - x1, position[1] - y1) + + # 🔥 외곽선 렌더링 (8방향) + outline_width = max(1, int(font_size / 12)) + outline_color = (255, 255, 255) if sum(text_color) < 384 else (0, 0, 0) + + for dx in [-outline_width, 0, outline_width]: + for dy in [-outline_width, 0, outline_width]: + if dx != 0 or dy != 0: + draw.text((rel_pos[0] + dx, rel_pos[1] + dy), text, font=font, fill=outline_color) + + # 본문 텍스트 + draw.text(rel_pos, text, font=font, fill=text_color) + + # ROI만 다시 BGR로 변환하여 원본에 적용 + result_roi = cv2.cvtColor(np.array(pil_roi), cv2.COLOR_RGB2BGR) + result_image = image.copy() + result_image[y1:y2, x1:x2] = result_roi + + return result_image + + # 🔥 ==================== 호환성 함수들 ==================== + + def estimate_text_size(self, text: str, font_size: int, font_path: Optional[str] = None) -> Tuple[int, int]: + """텍스트 크기 추정 (호환성 유지)""" + if font_path is None: + font_path = self.font_path or "" + return self._measure_text_size_pillow(text, font_path, font_size) + + def calculate_optimal_font_size(self, text: str, target_width: int, target_height: int, + min_size: int = 8, max_size: int = 100, + font_path: Optional[str] = None) -> int: + """최적 폰트 크기 계산 (호환성 유지)""" + if font_path is None: + font_path = self.font_path or "" + return int(self._fit_text_binary_search(text, target_width, target_height, font_path, min_size, max_size)) + + +# ------------------------------------------------------------------------- +# 🔥 사용 예시 +# ------------------------------------------------------------------------- +# logger = your_logger +# tr = TextRenderingModule(logger) +# result = tr.render_text(image, ocr_results, translated_texts, font_number=3) diff --git a/worker/text_rendering_module.py.ori b/worker/text_rendering_module.py.ori new file mode 100644 index 0000000..1a7436d --- /dev/null +++ b/worker/text_rendering_module.py.ori @@ -0,0 +1,496 @@ +# -*- coding: utf-8 -*- +""" +텍스트 렌더링 모듈 - 인페인팅된 이미지에 번역된 텍스트를 자연스럽게 렌더링 (라이브러리화) +- /app/worker/fonts/ 내 폰트를 font_map으로 관리 +- 외부에서 render_text 호출 시 font_number로 폰트 선택 +- 기본 폰트는 3번(NanumSquareRoundR.ttf)로 설정 +""" + +import os +import math +import logging +from typing import List, Dict, Any, Tuple, Optional + +import cv2 +import numpy as np +from PIL import Image, ImageDraw, ImageFont + + +class TextRenderingModule: + def __init__(self, logger, font_path: Optional[str] = None): + """ + Args: + logger: logger.log(msg, level=logging.INFO) 형태를 지원하는 로거 + font_path (Optional[str]): 외부에서 기본 폰트 경로를 강제 지정할 때 사용 (보통 None) + """ + self.logger = logger + self.default_font_size = 20 + self.font_cache: Dict[str, ImageFont.FreeTypeFont] = {} + + # 기본 폰트 번호 (요청사항: 3번) + self.default_font_number = 3 + + # /app/worker/fonts/ 내 폰트 맵 구성 및 유효성 검사 + default_path = self._setup_default_fonts() + + # 외부에서 font_path가 들어오면 우선 사용, 없으면 디폴트 선택 + self.font_path = font_path or default_path + + self.logger.log("텍스트 렌더링 모듈 초기화 완료", level=logging.INFO) + self.logger.log(f"기본 폰트 경로: {self.font_path}", level=logging.INFO) + + # --------------------------------------------------------------------- + # 내부 설정: /app/worker/fonts/ 내 폰트들을 번호로 매핑 + # --------------------------------------------------------------------- + def _setup_default_fonts(self) -> Optional[str]: + """ + /app/worker/fonts/ 경로의 폰트 파일을 번호로 매핑한다. + - 1: HakgyoansimDunggeunmisoTTFB.ttf + - 2: NanumBarunGothic.ttf + - 3: NanumSquareRoundR.ttf (기본) + - 4: gamtanload.ttf + - 5: Cafe24Ohsquare-v2.0.ttf + + Returns: + 기본으로 사용할 폰트 경로 (가능하면 3번, 없으면 첫 번째 유효한 폰트, 모두 없으면 None) + """ + base_path = "/app/worker/fonts/" + + self.font_map: Dict[int, str] = { + 1: os.path.join(base_path, "HakgyoansimDunggeunmisoTTFB.ttf"), + 2: os.path.join(base_path, "NanumBarunGothic.ttf"), + 3: os.path.join(base_path, "NanumSquareRoundR.ttf"), + 4: os.path.join(base_path, "gamtanload.ttf"), + 5: os.path.join(base_path, "Cafe24Ohsquare-v2.0.ttf"), + } + + # 실제 존재하는 폰트만 남김 + for key in list(self.font_map.keys()): + path = self.font_map[key] + if not os.path.exists(path): + self.logger.log(f"[경고] 폰트 파일 없음: {path} -> font_map에서 제외", level=logging.WARNING) + del self.font_map[key] + + # 기본(3번) 우선, 없으면 첫 번째 유효 폰트, 전무하면 None + if self.default_font_number in self.font_map: + return self.font_map[self.default_font_number] + elif len(self.font_map) > 0: + first_path = next(iter(self.font_map.values())) + self.logger.log( + f"[주의] 기본 3번 폰트가 없어 {first_path}로 대체 사용", level=logging.WARNING + ) + return first_path + else: + self.logger.log( + "[오류] 사용 가능한 폰트가 없습니다. PIL 기본 폰트를 사용합니다.", + level=logging.ERROR, + ) + return None + + # --------------------------------------------------------------------- + # 헬퍼: 번호로 폰트 경로 가져오기 + # --------------------------------------------------------------------- + def _get_font_path_by_number(self, font_number: Optional[int]) -> Optional[str]: + """ + font_number로 self.font_map에서 경로를 선택. + - 유효하지 않으면 self.font_path(기본 경로) 반환 + - 둘 다 없으면 None + """ + if font_number is not None: + if font_number in self.font_map: + return self.font_map[font_number] + else: + self.logger.log( + f"[경고] 알 수 없는 font_number={font_number}. 기본 폰트를 사용합니다.", + level=logging.WARNING, + ) + return self.font_path + + # --------------------------------------------------------------------- + # 폰트 로딩 / 사이즈 측정 + # --------------------------------------------------------------------- + def get_font(self, size: int, font_path: Optional[str] = None) -> ImageFont.FreeTypeFont: + """ + 폰트를 캐시하여 로딩 비용을 줄임 + - font_path가 None이면 PIL 기본 폰트 사용 + """ + if font_path is None: + cache_key = f"__PIL_DEFAULT__{size}" + if cache_key not in self.font_cache: + try: + font = ImageFont.load_default() + self.font_cache[cache_key] = font + except Exception as e: + self.logger.log(f"폰트 로드 오류(PIL 기본 폰트): {e}", level=logging.ERROR) + font = ImageFont.load_default() + self.font_cache[cache_key] = font + return self.font_cache[cache_key] + + cache_key = f"{font_path}_{size}" + if cache_key not in self.font_cache: + try: + if os.path.exists(font_path): + font = ImageFont.truetype(font_path, size) + else: + self.logger.log( + f"[경고] 지정 경로의 폰트가 존재하지 않음: {font_path}. PIL 기본 폰트 사용", + level=logging.WARNING, + ) + font = ImageFont.load_default() + self.font_cache[cache_key] = font + except Exception as e: + self.logger.log(f"폰트 로드 오류: {e}. PIL 기본 폰트 사용", level=logging.ERROR) + font = ImageFont.load_default() + self.font_cache[cache_key] = font + return self.font_cache[cache_key] + + def estimate_text_size(self, text: str, font_size: int, font_path: Optional[str] = None) -> Tuple[int, int]: + """ + 주어진 폰트로 텍스트의 렌더링 크기를 추정 + """ + font = self.get_font(font_size, font_path) + try: + bbox = font.getbbox(text) + width = bbox[2] - bbox[0] + height = bbox[3] - bbox[1] + except AttributeError: + # Pillow 버전에 따라 getbbox가 없을 수 있음 + width, height = font.getsize(text) + return width, height + + def calculate_optimal_font_size( + self, + text: str, + target_width: int, + target_height: int, + min_size: int = 8, + max_size: int = 100, + font_path: Optional[str] = None, + ) -> int: + """ + 주어진 영역(target_width, target_height)에 들어가는 최대 폰트 크기 탐색 + """ + best_size = min_size + for size in range(min_size, max_size + 1): + width, height = self.estimate_text_size(text, size, font_path) + if width <= target_width and height <= target_height: + best_size = size + else: + break + return best_size + + # --------------------------------------------------------------------- + # 색상 관련 + # --------------------------------------------------------------------- + def _estimate_background_color(self, image: np.ndarray, x1: int, y1: int, x2: int, y2: int) -> Tuple[int, int, int]: + """ + 텍스트가 들어갈 영역 주변의 평균 색상(BGR)을 구한 뒤 RGB로 반환 + """ + margin = 5 + y1_exp = max(0, y1 - margin) + y2_exp = min(image.shape[0], y2 + margin) + x1_exp = max(0, x1 - margin) + x2_exp = min(image.shape[1], x2 + margin) + region = image[y1_exp:y2_exp, x1_exp:x2_exp] + mean_color = np.mean(region, axis=(0, 1)) # BGR 평균 + # RGB 튜플로 변환 + return (int(mean_color[2]), int(mean_color[1]), int(mean_color[0])) + + def _get_contrasting_color(self, bg_color: Tuple[int, int, int]) -> Tuple[int, int, int]: + """ + 배경색과 대비되는 텍스트 색상(RGB) 선택 (단순 라이트/다크 기준) + """ + brightness = (bg_color[0] * 0.299 + bg_color[1] * 0.587 + bg_color[2] * 0.114) + if brightness > 128: + return (0, 0, 0) # 밝으면 검정 + else: + return (255, 255, 255) # 어두우면 흰색 + + # --------------------------------------------------------------------- + # 메인 렌더링 + # --------------------------------------------------------------------- + def render_text( + self, + image: np.ndarray, + ocr_results: List[Dict], + translated_texts: List[str], + font_number: Optional[int] = None, + ) -> np.ndarray: + """ + OCR 폴리곤과 번역 텍스트 리스트를 받아 지정된 폰트로 중심 정렬 렌더링 + + Args: + image: 원본 BGR 이미지 (numpy.ndarray) + ocr_results: [{'polygon': [(x,y), ...]}, ...] + translated_texts: 각 영역에 대응하는 번역 텍스트 리스트 + font_number: 사용할 폰트 번호 (None이면 기본 폰트) + """ + print(f"render_text in translated_texts: {translated_texts}") + print(f"render_text in ocr_results : {ocr_results}") + print(f"render_text in font_number: {font_number}") + result_image = image.copy() + + # 폰트 선택 + selected_font_path = self._get_font_path_by_number(font_number) + + for i, (ocr_result, translated_text) in enumerate(zip(ocr_results, translated_texts)): + polygon = ocr_result['polygon'] + polygon_array = np.array(polygon) + x_coords = polygon_array[:, 0] + y_coords = polygon_array[:, 1] + + x_min, x_max = int(np.min(x_coords)), int(np.max(x_coords)) + y_min, y_max = int(np.min(y_coords)), int(np.max(y_coords)) + width = max(1, x_max - x_min) + height = max(1, y_max - y_min) + + optimal_font_size = self.calculate_optimal_font_size( + translated_text, width, height, font_path=selected_font_path + ) + + text_width, text_height = self.estimate_text_size( + translated_text, optimal_font_size, selected_font_path + ) + + center_x = (x_min + x_max) // 2 + center_y = (y_min + y_max) // 2 + text_x = center_x - text_width // 2 + text_y = center_y - text_height // 2 + + angle = 0.0 + if len(polygon_array) >= 2: + dx = float(polygon_array[1][0] - polygon_array[0][0]) + dy = float(polygon_array[1][1] - polygon_array[0][1]) + angle = math.degrees(math.atan2(dy, dx)) + + bg_color = self._estimate_background_color(image, x_min, y_min, x_max, y_max) + text_color = self._get_contrasting_color(bg_color) + + result_image = self.render_text_on_image( + result_image, + translated_text, + (int(text_x), int(text_y)), + font_size=optimal_font_size, + font_path=selected_font_path, + text_color=text_color, + background_color=None, + angle=angle, + ) + + return result_image + + def render_text_on_image( + self, + image: np.ndarray, + text: str, + position: Tuple[int, int], + font_size: Optional[int] = None, + font_path: Optional[str] = None, + text_color: Tuple[int, int, int] = (0, 0, 0), + background_color: Optional[Tuple[int, int, int]] = None, + angle: float = 0.0, + ) -> np.ndarray: + """ + 단일 텍스트를 지정 좌표에 그린다. (RGB 색상 인자 사용) + 🔥 최적화: 메모리 효율성 개선 + """ + if font_size is None: + font_size = self.default_font_size + + # 🔥 최적화: PIL 변환 최소화 - 필요한 영역만 처리 + h, w = image.shape[:2] + text_width, text_height = self.estimate_text_size(text, font_size, font_path) + + # 텍스트 영역만 PIL로 처리하여 메모리 절약 + padding = 20 + x1 = max(0, position[0] - padding) + y1 = max(0, position[1] - padding) + x2 = min(w, position[0] + text_width + padding) + y2 = min(h, position[1] + text_height + padding) + + # 작은 영역만 PIL로 변환 + roi = image[y1:y2, x1:x2] + pil_roi = Image.fromarray(cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)) + draw = ImageDraw.Draw(pil_roi) + font = self.get_font(font_size, font_path) + + # 상대 좌표로 조정 + rel_pos = (position[0] - x1, position[1] - y1) + + # 배경 박스(옵션) + if background_color is not None: + bg_x1 = rel_pos[0] - 2 + bg_y1 = rel_pos[1] - 2 + bg_x2 = rel_pos[0] + text_width + 2 + bg_y2 = rel_pos[1] + text_height + 2 + draw.rectangle([bg_x1, bg_y1, bg_x2, bg_y2], fill=background_color) + + # 회전 처리 + if angle != 0: + text_image = Image.new('RGBA', (text_width + 10, text_height + 10), (255, 255, 255, 0)) + text_draw = ImageDraw.Draw(text_image) + text_draw.text((5, 5), text, font=font, fill=text_color + (255,)) + rotated_text = text_image.rotate(angle, expand=True) + pil_roi.paste(rotated_text, rel_pos, rotated_text) + else: + draw.text(rel_pos, text, font=font, fill=text_color) + + # ROI만 다시 BGR로 변환하여 원본에 적용 + result_roi = cv2.cvtColor(np.array(pil_roi), cv2.COLOR_RGB2BGR) + result_image = image.copy() + result_image[y1:y2, x1:x2] = result_roi + + return result_image + + # --------------------------------------------------------------------- + # 스타일 관련 (선택 사용) + # --------------------------------------------------------------------- + def create_text_styles(self) -> Dict[str, Dict[str, Any]]: + """다양한 텍스트 스타일 정의""" + styles = { + 'default': { + 'color': (0, 0, 0), + 'bg_color': None, + 'outline': True, + 'outline_color': (255, 255, 255), + 'outline_width': 1, + }, + 'bold': { + 'color': (0, 0, 0), + 'bg_color': (255, 255, 255), + 'outline': True, + 'outline_color': (128, 128, 128), + 'outline_width': 2, + }, + 'highlight': { + 'color': (255, 255, 255), + 'bg_color': (255, 0, 0), + 'outline': False, + 'outline_color': None, + 'outline_width': 0, + }, + 'subtle': { + 'color': (128, 128, 128), + 'bg_color': None, + 'outline': True, + 'outline_color': (255, 255, 255), + 'outline_width': 1, + }, + } + return styles + + def render_with_style( + self, + image: np.ndarray, + ocr_results: List[Dict], + translated_texts: List[str], + style_name: str = 'default', + font_number: Optional[int] = None, + ) -> np.ndarray: + """ + 스타일 사전의 색/배경/외곽선 옵션을 참고해서 렌더링. + 현재 코드는 기본 렌더링 결과를 반환하며, 추가 효과(그림자/글로우 등)는 필요 시 확장. + """ + styles = self.create_text_styles() + + if style_name not in styles: + self.logger.log(f"[경고] 알 수 없는 스타일: {style_name}. 'default'로 대체", level=logging.WARNING) + style_name = 'default' + + # NOTE: 현재 스타일 색을 직접 반영하려면 render_text 내부에서 색 적용 로직을 확장하면 됨. + # 여기서는 기본 렌더링만 수행. + result = self.render_text( + image=image, + ocr_results=ocr_results, + translated_texts=translated_texts, + font_number=font_number, + ) + + # (추가 스타일 후처리 자리) + return result + + # --------------------------------------------------------------------- + # 텍스트 길이/크기 보정 유틸 + # --------------------------------------------------------------------- + def adjust_text_for_space( + self, + text: str, + max_width: int, + max_height: int, + font_size: int, + ) -> Tuple[str, int]: + """ + 공간에 맞게 텍스트 조정 (간단 샘플 로직) + - 길면 줄바꿈/생략 + - 필요 시 폰트 크기 감소 + """ + if len(text) > 20: + words = text.split(' ') + if len(words) > 1: + mid = len(words) // 2 + text = ' '.join(words[:mid]) + '\n' + ' '.join(words[mid:]) + else: + text = text[:15] + '...' + + adjusted_font_size = font_size + while adjusted_font_size > 8: + estimated_width = int(len(text) * adjusted_font_size * 0.6) + if estimated_width <= max_width: + break + adjusted_font_size -= 2 + + return text, adjusted_font_size + + # --------------------------------------------------------------------- + # 테스트/디버그용 비교 이미지 저장 + # --------------------------------------------------------------------- + def _create_style_comparison(self, images: List[np.ndarray], style_names: List[str]): + """ + 스타일 비교 이미지 생성 (디버그 용도) + """ + if not images: + return + + # 이미지 크기 조정 + target_width = 200 + target_height = int(images[0].shape[0] * target_width / images[0].shape[1]) + + resized_images = [cv2.resize(img, (target_width, target_height)) for img in images] + + # 비교 캔버스 + num_images = len(resized_images) + comparison_width = target_width * num_images + comparison_height = target_height + 30 + + comparison = np.ones((comparison_height, comparison_width, 3), dtype=np.uint8) * 255 + + # 원본 + comparison[30:30 + target_height, 0:target_width] = resized_images[0] + cv2.putText(comparison, "Original", (10, 20), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) + + # 스타일들 + for i, (img, style_name) in enumerate(zip(resized_images[1:], style_names)): + x_offset = target_width * (i + 1) + comparison[30:30 + target_height, x_offset:x_offset + target_width] = img + cv2.putText(comparison, style_name, (x_offset + 10, 20), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) + + os.makedirs("test_output", exist_ok=True) + out_path = "test_output/text_style_comparison.jpg" + cv2.imwrite(out_path, comparison) + self.logger.log(f"스타일 비교 이미지 저장 완료: {out_path}", level=logging.INFO) + + +# ------------------------------------------------------------------------- +# 사용 예시 (참고용) +# ------------------------------------------------------------------------- +# logger = your_logger +# tr = TextRenderingModule(logger) +# result = tr.render_text( +# image=origin_img, +# ocr_results=[{'polygon': [(10,10),(110,10),(110,40),(10,40)]}], +# translated_texts=["예시 텍스트"], +# font_number=3 # 기본값 3번, 명시적으로 지정 가능 +# ) +# cv2.imwrite("result.jpg", result) diff --git a/worker/text_rendering_module2.py b/worker/text_rendering_module2.py index ffe303c..4848b03 100644 --- a/worker/text_rendering_module2.py +++ b/worker/text_rendering_module2.py @@ -1,625 +1,625 @@ -# -*- coding: utf-8 -*- -""" -Text Rendering Module (최종 교체판) -- PaddleOCR 결과(폴리곤) + 번역문을 인페인팅된 이미지에 자연스럽게 합성 -- 기존 API 호환: render_text(image, ocr_results, translated_texts, font_number=None) -- 추가 API: - * render_text_on_quadrilateral(image, polygon, text, ...) - * render_with_market_preset(image_bgr, ocr_results, translated_texts, market="coupang", preset="basic", ...) -- 특징: - * 4점 폴리곤(사다리꼴) 원근 투영 합성 - * WCAG 대비(4.5:1) 자동 보정 - * 외곽선/섀도/글로우 + 블렌딩 모드 지원 - * Pango/HarfBuzz(+Cairo) 가능시 고품질 셰이핑/레이아웃, 미지원 환경은 Pillow 폴백 - * 한글/CJK 줄바꿈 폭맞춤, 이분 탐색 폰트 피팅 -- 폰트: - * /app/worker/fonts/ 경로의 폰트를 번호로 매핑 - * 기본 폰트 번호: 3 (NanumSquareRoundR.ttf) -""" - -import os -import math -import logging -from typing import List, Dict, Any, Tuple, Optional - -import cv2 -import numpy as np -from PIL import Image, ImageDraw, ImageFont - - -# ───────────────────────────── GI(Pango) 체크 ───────────────────────────── -class _PangoCtx: - """Pango/PangoCairo/cairo 사용 가능 여부와 핸들.""" - def __init__(self, logger=None): - self.available = False - self.Pango = None - self.PangoCairo = None - self.cairo = None - try: - import gi - gi.require_version("Pango", "1.0") - gi.require_version("PangoCairo", "1.0") - from gi.repository import Pango, PangoCairo, cairo # type: ignore - self.Pango = Pango - self.PangoCairo = PangoCairo - self.cairo = cairo - self.available = True - if logger: - logger.log("[Pango] 사용 가능", level=logging.INFO) - except Exception as e: - if logger: - logger.log(f"[Pango] 사용 불가 → Pillow 폴백 ({e})", level=logging.WARNING) - - -# ───────────────────────────── 색/대비 유틸 ───────────────────────────── -def _srgb_to_lum(rgb: Tuple[int, int, int]) -> float: - r, g, b = [v / 255.0 for v in rgb] - def _ch(c): return (c / 12.92) if c <= 0.04045 else ((c + 0.055) / 1.055) ** 2.4 - r, g, b = _ch(r), _ch(g), _ch(b) - return 0.2126 * r + 0.7152 * g + 0.0722 * b - -def _contrast_ratio(c1: Tuple[int, int, int], c2: Tuple[int, int, int]) -> float: - L1, L2 = _srgb_to_lum(c1), _srgb_to_lum(c2) - L1, L2 = max(L1, L2), min(L1, L2) - return (L1 + 0.05) / (L2 + 0.05) - -def _ensure_wcag_contrast(fg: Tuple[int, int, int], bg: Tuple[int, int, int], target: float = 4.5) -> Tuple[int, int, int]: - # 흑/백/원색 후보 중 최적 먼저 - candidates = [(0, 0, 0), (255, 255, 255), fg] - best = max(candidates, key=lambda c: _contrast_ratio(c, bg)) - if _contrast_ratio(best, bg) >= target: - return best - # 부족하면 밝기 차를 늘리는 방향으로 보정 - fa = np.array(fg, np.float32) - ba = np.array(bg, np.float32) - direction = np.sign(fa - ba) - for k in (32, 64, 96, 128): - cand = np.clip(fa + direction * k, 0, 255).astype(np.uint8) - if _contrast_ratio(tuple(map(int, cand)), bg) >= target: - return tuple(map(int, cand)) - return tuple(map(int, fa)) - - -# ───────────────────────────── 기하/합성 유틸 ───────────────────────────── -def _order_quad(pts: List[Tuple[int, int]]) -> np.ndarray: - pts = np.array(pts, dtype=np.float32) - s = pts.sum(axis=1) - d = np.diff(pts, axis=1).ravel() - tl = pts[np.argmin(s)] - br = pts[np.argmax(s)] - tr = pts[np.argmin(d)] - bl = pts[np.argmax(d)] - return np.array([tl, tr, br, bl], dtype=np.float32) - -def _poly_bbox(poly: List[Tuple[int, int]]) -> Tuple[int, int, int, int]: - arr = np.array(poly) - x1, y1 = int(np.min(arr[:, 0])), int(np.min(arr[:, 1])) - x2, y2 = int(np.max(arr[:, 0])), int(np.max(arr[:, 1])) - return x1, y1, x2, y2 - -def _median_bg_rgb(image_bgr: np.ndarray, x1: int, y1: int, x2: int, y2: int) -> Tuple[int, int, int]: - h, w = image_bgr.shape[:2] - x1 = max(0, x1); y1 = max(0, y1); x2 = min(w, x2); y2 = min(h, y2) - if x2 <= x1 or y2 <= y1: - return (200, 200, 200) - region = image_bgr[y1:y2, x1:x2].reshape(-1, 3) - if region.size == 0: - return (200, 200, 200) - m = np.median(region, axis=0) # BGR - return (int(m[2]), int(m[1]), int(m[0])) # RGB - -def _alpha_blend(dst_bgr: np.ndarray, src_rgba: np.ndarray) -> np.ndarray: - b, g, r, a = cv2.split(src_rgba) - a = a.astype(np.float32) / 255.0 - fg = cv2.merge([b, g, r]).astype(np.float32) - bg = dst_bgr.astype(np.float32) - out = fg * a[..., None] + bg * (1.0 - a[..., None]) - return out.astype(np.uint8) - -def _apply_blend_mode(dst_bgr: np.ndarray, blended_bgr: np.ndarray, mode: str = "normal") -> np.ndarray: - if mode == "normal": - return blended_bgr - d = dst_bgr.astype(np.float32) / 255.0 - s = blended_bgr.astype(np.float32) / 255.0 - if mode == "multiply": - out = d * s - elif mode == "screen": - out = 1 - (1 - d) * (1 - s) - elif mode == "overlay": - mask = d <= 0.5 - out = np.empty_like(d) - out[mask] = 2 * d[mask] * s[mask] - out[~mask] = 1 - 2 * (1 - d[~mask]) * (1 - s[~mask]) - else: - out = s - return (np.clip(out, 0, 1) * 255).astype(np.uint8) - -def _warp_rgba_to_polygon(dst_bgr: np.ndarray, src_rgba: np.ndarray, polygon: List[Tuple[int, int]]) -> np.ndarray: - h, w = dst_bgr.shape[:2] - x1, y1, x2, y2 = _poly_bbox(polygon) - W, H = max(1, x2 - x1), max(1, y2 - y1) - src_rgba = cv2.resize(src_rgba, (W, H), interpolation=cv2.INTER_LANCZOS4) - Hm = cv2.getPerspectiveTransform( - np.array([[0, 0], [W, 0], [W, H], [0, H]], dtype=np.float32), - _order_quad(polygon) - ) - warped = cv2.warpPerspective(src_rgba, Hm, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_TRANSPARENT) - return _alpha_blend(dst_bgr, warped) - - -# ───────────────────────────── 마켓 프리셋 ───────────────────────────── -def _create_market_presets() -> Dict[str, Dict[str, Any]]: - """쿠팡/네이버 느낌 프리셋.""" - COUPANG_BLUE = (0, 116, 228) - NAVER_GREEN = (3, 199, 90) - return { - "coupang_basic": { - "color": (0, 0, 0), "bg_color": None, - "outline": 1, "shadow": 1, - "letter_spacing": 0, "line_height": 1.12, - "blend_mode": "overlay", "max_font": 96, - "rounded_bg": False, "padding": (0, 0), - }, - "coupang_badge": { - "color": (255, 255, 255), "bg_color": COUPANG_BLUE, - "outline": 0, "shadow": 2, - "letter_spacing": -1, "line_height": 1.00, - "blend_mode": "screen", "max_font": 84, - "rounded_bg": True, "padding": (10, 6), - }, - "naver_basic": { - "color": (10, 10, 10), "bg_color": None, - "outline": 1, "shadow": 1, - "letter_spacing": 0, "line_height": 1.15, - "blend_mode": "normal", "max_font": 96, - "rounded_bg": False, "padding": (0, 0), - }, - "naver_price": { - "color": NAVER_GREEN, "bg_color": None, - "outline": 1, "shadow": 1, - "letter_spacing": 0, "line_height": 1.10, - "blend_mode": "screen", "max_font": 110, - "rounded_bg": False, "padding": (0, 0), - }, - } - - -# ───────────────────────────── 메인 클래스 ───────────────────────────── -class TextRenderingModule: - def __init__(self, logger, font_path: Optional[str] = None): - """ - logger: logger.log(msg, level=logging.INFO) 형태를 지원 - font_path: 외부에서 기본 폰트 경로 강제 지정 가능(보통 None) - """ - self.logger = logger - self.default_font_size = 20 - self.font_cache: Dict[str, ImageFont.FreeTypeFont] = {} - self.default_font_number = 3 - default_path = self._setup_default_fonts() - self.font_path = font_path or default_path - self._pango = _PangoCtx(self.logger) # Pango 준비 - self.logger.log("텍스트 렌더링 모듈(최종판) 초기화 완료", level=logging.INFO) - self.logger.log(f"기본 폰트 경로: {self.font_path}", level=logging.INFO) - - # ── 폰트 맵 - def _setup_default_fonts(self) -> Optional[str]: - base_path = "/app/worker/fonts/" - self.font_map: Dict[int, str] = { - 1: os.path.join(base_path, "HakgyoansimDunggeunmisoTTFB.ttf"), - 2: os.path.join(base_path, "NanumBarunGothic.ttf"), - 3: os.path.join(base_path, "NanumSquareRoundR.ttf"), - 4: os.path.join(base_path, "gamtanload.ttf"), - 5: os.path.join(base_path, "Cafe24Ohsquare-v2.0.ttf"), - } - for k in list(self.font_map.keys()): - p = self.font_map[k] - if not os.path.exists(p): - self.logger.log(f"[경고] 폰트 없음: {p} -> 제거", level=logging.WARNING) - del self.font_map[k] - if self.default_font_number in self.font_map: - return self.font_map[self.default_font_number] - if self.font_map: - first = next(iter(self.font_map.values())) - self.logger.log(f"[주의] 기본(3) 없음 → {first} 사용", level=logging.WARNING) - return first - self.logger.log("[오류] 사용 가능한 폰트가 없습니다. PIL 기본 폰트 사용", level=logging.ERROR) - return None - - def _get_font_path_by_number(self, font_number: Optional[int]) -> Optional[str]: - if font_number is not None: - if font_number in self.font_map: - return self.font_map[font_number] - self.logger.log(f"[경고] 알 수 없는 font_number={font_number} → 기본 사용", level=logging.WARNING) - return self.font_path - - # ── Pillow 폰트/측정 - def _get_pillow_font(self, size: int, font_path: Optional[str]) -> ImageFont.FreeTypeFont: - if font_path is None: - key = f"__PIL_DEFAULT__{size}" - if key not in self.font_cache: - self.font_cache[key] = ImageFont.load_default() - return self.font_cache[key] - key = f"{font_path}_{size}" - if key not in self.font_cache: - try: - self.font_cache[key] = ImageFont.truetype(font_path, size) - except Exception as e: - self.logger.log(f"[경고] PIL truetype 실패({e}) → 기본 폰트", level=logging.WARNING) - self.font_cache[key] = ImageFont.load_default() - return self.font_cache[key] - - def _estimate_text_size(self, text: str, font_size: int, font_path: Optional[str]) -> Tuple[int, int]: - font = self._get_pillow_font(font_size, font_path) - try: - bbox = font.getbbox(text) - w = bbox[2] - bbox[0]; h = bbox[3] - bbox[1] - except AttributeError: - w, h = font.getsize(text) - return w, h - - # ── 이분 탐색 폰트 피팅(단일 행 기준) - def _calc_font_fit_binary(self, text: str, tw: int, th: int, min_s: int, max_s: int, font_path: Optional[str]) -> int: - lo, hi = min_s, max_s - best = min_s - while lo <= hi: - mid = (lo + hi) // 2 - w, h = self._estimate_text_size(text, mid, font_path) - if w <= tw and h <= th: - best = mid; lo = mid + 1 - else: - hi = mid - 1 - return best - - # ────────────────────── RGBA 텍스트 생성 (Pango 우선) ────────────────────── - def _render_text_rgba( - self, - text: str, - width: int, - font_family_or_path: str, - font_size: int, - color: Tuple[int, int, int], - *, - line_height: float = 1.15, - letter_spacing: int = 0, - rounded_bg: bool = False, - bg_color: Optional[Tuple[int, int, int]] = None, - padding: Tuple[int, int] = (0, 0), - outline: int = 0, - outline_color: Tuple[int, int, int] = (255, 255, 255), - shadow: int = 0, - shadow_color: Tuple[int, int, int] = (0, 0, 0), - ) -> np.ndarray: - """ - width 폭 안에서 줄바꿈/레이아웃을 적용한 텍스트 RGBA 생성. - - Pango/PangoCairo 사용 가능 시 그 경로 사용 - - 아니면 Pillow로 폴백 - """ - if self._pango.available: - try: - return self._render_text_rgba_pango( - text, width, font_family_or_path, font_size, color, - line_height=line_height, letter_spacing=letter_spacing, - rounded_bg=rounded_bg, bg_color=bg_color, padding=padding, - outline=outline, outline_color=outline_color, - shadow=shadow, shadow_color=shadow_color - ) - except Exception as e: - self.logger.log(f"[Pango] 실패 → Pillow 폴백: {e}", level=logging.WARNING) - return self._render_text_rgba_pillow( - text, width, font_family_or_path, font_size, color, - line_height=line_height, letter_spacing=letter_spacing, - rounded_bg=rounded_bg, bg_color=bg_color, padding=padding, - outline=outline, outline_color=outline_color, - shadow=shadow, shadow_color=shadow_color - ) - - # Pango/Cairo 경로 - def _render_text_rgba_pango( - self, text: str, width: int, font_family: str, font_size: int, color: Tuple[int, int, int], - *, line_height: float, letter_spacing: int, - rounded_bg: bool, bg_color: Optional[Tuple[int, int, int]], - padding: Tuple[int, int], outline: int, outline_color: Tuple[int, int, int], - shadow: int, shadow_color: Tuple[int, int, int], - ) -> np.ndarray: - Pango, PangoCairo, cairo = self._pango.Pango, self._pango.PangoCairo, self._pango.cairo - - # 측정용 surface - surf = cairo.ImageSurface(cairo.FORMAT_ARGB32, 1, 1) - ctx = cairo.Context(surf) - layout = PangoCairo.create_layout(ctx) - - fd = Pango.FontDescription(f"{font_family} {font_size}") - layout.set_font_description(fd) - layout.set_width(width * Pango.SCALE) - layout.set_wrap(Pango.WrapMode.WORD_CHAR) - layout.set_alignment(Pango.Alignment.LEFT) - - attrs = Pango.AttrList() - if letter_spacing != 0: - attrs.insert(Pango.attr_letter_spacing_new(letter_spacing * Pango.SCALE)) - layout.set_attributes(attrs) - layout.set_text(text, -1) - - # 전체 크기 - lw, lh = layout.get_pixel_size() - if lh <= 0: lh = font_size - - px, py = padding - W = max(1, width + px * 2 + outline * 2 + shadow * 2) - H = max(1, int(lh * line_height) + py * 2 + outline * 2 + shadow * 2) - - surf2 = cairo.ImageSurface(cairo.FORMAT_ARGB32, W, H) - ctx2 = cairo.Context(surf2) - - # 배경 - if bg_color is not None: - r, g, b = [v / 255.0 for v in bg_color] - ctx2.set_source_rgba(r, g, b, 1.0) - rad = 6 if rounded_bg else 0 - self._cairo_round_rect(ctx2, 0, 0, W, H, rad) - ctx2.fill() - - # 섀도 - tx = px + outline + shadow - ty = py + outline + shadow - if shadow > 0: - r, g, b = [v / 255.0 for v in shadow_color] - ctx2.set_source_rgba(r, g, b, 0.5) - PangoCairo.update_layout(ctx2, layout) - ctx2.move_to(tx + shadow, ty + shadow) - PangoCairo.show_layout(ctx2, layout) - - # 외곽선 - if outline > 0: - r, g, b = [v / 255.0 for v in outline_color] - ctx2.set_source_rgba(r, g, b, 1.0) - ctx2.set_line_width(outline * 2) - PangoCairo.update_layout(ctx2, layout) - ctx2.move_to(tx, ty) - PangoCairo.layout_path(ctx2, layout) - ctx2.stroke() - - # 본문 - r, g, b = [v / 255.0 for v in color] - ctx2.set_source_rgba(r, g, b, 1.0) - ctx2.move_to(tx, ty) - PangoCairo.show_layout(ctx2, layout) - - buf = surf2.get_data() - arr = np.frombuffer(buf, np.uint8).reshape(H, W, 4) - return arr.copy() # ARGB premultiplied -> RGBA와 동등 취급 - - @staticmethod - def _cairo_round_rect(ctx, x, y, w, h, r): - if r <= 0: - ctx.rectangle(x, y, w, h); return - import math as _m - ctx.new_sub_path() - ctx.arc(x + w - r, y + r, r, -90 * _m.pi / 180, 0) - ctx.arc(x + w - r, y + h - r, r, 0, 90 * _m.pi / 180) - ctx.arc(x + r, y + h - r, r, 90 * _m.pi / 180, 180 * _m.pi / 180) - ctx.arc(x + r, y + r, r, 180 * _m.pi / 180, 270 * _m.pi / 180) - ctx.close_path() - - # Pillow 폴백 경로(간단 다중행) - def _render_text_rgba_pillow( - self, text: str, width: int, font_path_or_name: str, font_size: int, color: Tuple[int, int, int], - *, line_height: float, letter_spacing: int, rounded_bg: bool, bg_color: Optional[Tuple[int, int, int]], - padding: Tuple[int, int], outline: int, outline_color: Tuple[int, int, int], - shadow: int, shadow_color: Tuple[int, int, int] - ) -> np.ndarray: - try: - font = ImageFont.truetype(font_path_or_name, font_size) - except Exception: - font = ImageFont.load_default() - - # 폭 기준 줄바꿈 - def _wrap(txt: str) -> str: - lines, line = [], "" - for ch in txt: - test = line + ch - w = int(font.getlength(test)) if hasattr(font, "getlength") else font.getbbox(test)[2] - if w <= width or not line: - line = test - else: - lines.append(line); line = ch - if line: lines.append(line) - return "\n".join(lines) - - wrapped = _wrap(text) - lines = wrapped.split("\n") - max_w, total_h = 0, 0 - for ln in lines: - w = int(font.getlength(ln)) if hasattr(font, "getlength") else font.getbbox(ln)[2] - h = font.getbbox(ln)[3] - max_w = max(max_w, w) - total_h += int(h * line_height) - - px, py = padding - W = max(1, max_w + px * 2 + outline * 2 + shadow * 2) - H = max(1, total_h + py * 2 + outline * 2 + shadow * 2) - - img = Image.new("RGBA", (W, H), (255, 255, 255, 0)) - draw = ImageDraw.Draw(img) - - # 배경 - if bg_color is not None: - self._draw_round_rect(draw, (0, 0, W, H), 6 if rounded_bg else 0, bg_color + (255,)) - - # 섀도/외곽선/본문 - y = py + outline + shadow - for ln in lines: - # 섀도 - if shadow > 0: - draw.text((px + outline + shadow * 2, y + shadow), - ln, font=font, fill=shadow_color + (128,)) - # 외곽선 - if outline > 0: - for ox in range(-outline, outline + 1): - for oy in range(-outline, outline + 1): - if ox == 0 and oy == 0: continue - draw.text((px + outline + ox, y + outline + oy), - ln, font=font, fill=outline_color + (255,)) - # 본문 - draw.text((px + outline, y + outline), ln, font=font, fill=color + (255,)) - h = font.getbbox(ln)[3] - y += int(h * line_height) - - return np.array(img, dtype=np.uint8) - - @staticmethod - def _draw_round_rect(draw: ImageDraw.ImageDraw, box: Tuple[int, int, int, int], radius: int, fill: Tuple[int, int, int, int]): - x1, y1, x2, y2 = box - w, h = x2 - x1, y2 - y1 - rr = Image.new("RGBA", (w, h), (255, 255, 255, 0)) - d = ImageDraw.Draw(rr) - if radius <= 0: - d.rectangle((0, 0, w, h), fill=fill) - else: - d.rounded_rectangle((0, 0, w, h), radius=radius, fill=fill) - draw.bitmap((x1, y1), rr) - - # ────────────────────── 기존 API: 회전 중심 정렬 ────────────────────── - def render_text( - self, - image: np.ndarray, - ocr_results: List[Dict], - translated_texts: List[str], - font_number: Optional[int] = None, - use_wcag: bool = True, - outline: int = 1, - shadow: int = 0, - blend_mode: str = "normal", - ) -> np.ndarray: - """ - 기존 인터페이스 유지. (사다리꼴 투영이 아닌, 중심 정렬 + 회전) - """ - out = image.copy() - fpath = self._get_font_path_by_number(font_number) - - for r, t in zip(ocr_results, translated_texts): - poly = r['polygon'] - arr = np.array(poly) - x1, y1, x2, y2 = int(np.min(arr[:, 0])), int(np.min(arr[:, 1])), int(np.max(arr[:, 0])), int(np.max(arr[:, 1])) - W, H = max(1, x2 - x1), max(1, y2 - y1) - - # 폰트 크기(단일 행) - font_size = self._calc_font_fit_binary(t, W, H, 8, 120, fpath) - - # 배경색 → 텍스트색 - bg = _median_bg_rgb(out, x1, y1, x2, y2) - txt_color = (0, 0, 0) if _srgb_to_lum(bg) > 0.5 else (255, 255, 255) - if use_wcag: - txt_color = _ensure_wcag_contrast(txt_color, bg, 4.5) - - # RGBA 만들고(단일 행), 회전/중앙 배치 - rgba = self._render_text_rgba( - text=t, width=W, font_family_or_path=fpath or "NanumSquareRound", - font_size=font_size, color=txt_color, - outline=outline, shadow=shadow - ) - if len(arr) >= 2: - dx = float(arr[1][0] - arr[0][0]); dy = float(arr[1][1] - arr[0][1]) - ang = math.degrees(math.atan2(dy, dx)) - else: - ang = 0.0 - pil_rgba = Image.fromarray(rgba) - if ang != 0: - pil_rgba = pil_rgba.rotate(ang, expand=True) - rgba = np.array(pil_rgba) - - tw, th = rgba.shape[1], rgba.shape[0] - cx, cy = (x1 + x2) // 2, (y1 + y2) // 2 - xx, yy = int(cx - tw // 2), int(cy - th // 2) - - # 경계 체크 & 합성 - x0, y0 = max(0, xx), max(0, yy) - xE, yE = min(out.shape[1], xx + tw), min(out.shape[0], yy + th) - sx, sy = max(0, -xx), max(0, -yy) - if x0 < xE and y0 < yE: - roi = out[y0:yE, x0:xE] - cut = rgba[sy:sy + (yE - y0), sx:sx + (xE - x0)] - blended = _alpha_blend(roi, cut) - blended = _apply_blend_mode(roi, blended, mode=blend_mode) - out[y0:yE, x0:xE] = blended - - return out - - # ────────────────────── 신규: 4점 폴리곤 원근 투영 ────────────────────── - def render_text_on_quadrilateral( - self, - image: np.ndarray, - polygon: List[Tuple[int, int]], - text: str, - font_number: Optional[int] = None, - use_wcag: bool = True, - multiline: bool = True, - outline: int = 1, - shadow: int = 0, - letter_spacing: int = 0, - line_height: float = 1.15, - blend_mode: str = "normal", - ) -> np.ndarray: - out = image.copy() - x1, y1, x2, y2 = _poly_bbox(polygon) - W, H = max(1, x2 - x1), max(1, y2 - y1) - fpath = self._get_font_path_by_number(font_number) - - bg = _median_bg_rgb(out, x1, y1, x2, y2) - txt_color = (0, 0, 0) if _srgb_to_lum(bg) > 0.5 else (255, 255, 255) - if use_wcag: - txt_color = _ensure_wcag_contrast(txt_color, bg, 4.5) - - # 대략적 폰트 크기 추정(높이에 근사) - font_size = max(12, min(int(H * 0.85), 120)) - - rgba = self._render_text_rgba( - text=text, width=W, font_family_or_path=fpath or "NanumSquareRound", - font_size=font_size, color=txt_color, - line_height=line_height, letter_spacing=letter_spacing, - outline=outline, shadow=shadow - ) - out = _warp_rgba_to_polygon(out, rgba, polygon) - out = _apply_blend_mode(image, out, mode=blend_mode) - return out - - # ────────────────────── 신규: 마켓 프리셋 일괄 적용 ────────────────────── - def render_with_market_preset( - self, - image_bgr: np.ndarray, - ocr_results: List[Dict], - translated_texts: List[str], - market: str = "coupang", # 'coupang' | 'naver' - preset: str = "basic", # 'basic' | 'badge' | 'price' - font_number: Optional[int] = None, - ) -> np.ndarray: - presets = _create_market_presets() - key = f"{market}_{preset}" - if key not in presets: - key = "coupang_basic" if market == "coupang" else "naver_basic" - self.logger.log(f"[preset] {market}/{preset} 없음 → {key} 사용", level=logging.WARNING) - st = presets[key] - - out = image_bgr.copy() - fpath = self._get_font_path_by_number(font_number) or "NanumSquareRound" - - for r, txt in zip(ocr_results, translated_texts): - poly = r["polygon"] - x1, y1, x2, y2 = _poly_bbox(poly) - W, H = max(1, x2 - x1), max(1, y2 - y1) - - # 배경-대비 반영 - bg = _median_bg_rgb(out, x1, y1, x2, y2) - fg = _ensure_wcag_contrast(st["color"], bg, 4.5) - - font_size = min(max(12, int(H * 0.85)), st["max_font"]) - rgba = self._render_text_rgba( - text=txt, width=W, font_family_or_path=fpath, font_size=font_size, color=fg, - line_height=st["line_height"], letter_spacing=st["letter_spacing"], - rounded_bg=st["rounded_bg"], bg_color=st["bg_color"], padding=st["padding"], - outline=st["outline"], outline_color=(255, 255, 255), - shadow=st["shadow"], shadow_color=(0, 0, 0) - ) - out = _warp_rgba_to_polygon(out, rgba, poly) - out = _apply_blend_mode(image_bgr, out, mode=st["blend_mode"]) - return out +# -*- coding: utf-8 -*- +""" +Text Rendering Module (최종 교체판) +- PaddleOCR 결과(폴리곤) + 번역문을 인페인팅된 이미지에 자연스럽게 합성 +- 기존 API 호환: render_text(image, ocr_results, translated_texts, font_number=None) +- 추가 API: + * render_text_on_quadrilateral(image, polygon, text, ...) + * render_with_market_preset(image_bgr, ocr_results, translated_texts, market="coupang", preset="basic", ...) +- 특징: + * 4점 폴리곤(사다리꼴) 원근 투영 합성 + * WCAG 대비(4.5:1) 자동 보정 + * 외곽선/섀도/글로우 + 블렌딩 모드 지원 + * Pango/HarfBuzz(+Cairo) 가능시 고품질 셰이핑/레이아웃, 미지원 환경은 Pillow 폴백 + * 한글/CJK 줄바꿈 폭맞춤, 이분 탐색 폰트 피팅 +- 폰트: + * /app/worker/fonts/ 경로의 폰트를 번호로 매핑 + * 기본 폰트 번호: 3 (NanumSquareRoundR.ttf) +""" + +import os +import math +import logging +from typing import List, Dict, Any, Tuple, Optional + +import cv2 +import numpy as np +from PIL import Image, ImageDraw, ImageFont + + +# ───────────────────────────── GI(Pango) 체크 ───────────────────────────── +class _PangoCtx: + """Pango/PangoCairo/cairo 사용 가능 여부와 핸들.""" + def __init__(self, logger=None): + self.available = False + self.Pango = None + self.PangoCairo = None + self.cairo = None + try: + import gi + gi.require_version("Pango", "1.0") + gi.require_version("PangoCairo", "1.0") + from gi.repository import Pango, PangoCairo, cairo # type: ignore + self.Pango = Pango + self.PangoCairo = PangoCairo + self.cairo = cairo + self.available = True + if logger: + logger.log("[Pango] 사용 가능", level=logging.INFO) + except Exception as e: + if logger: + logger.log(f"[Pango] 사용 불가 → Pillow 폴백 ({e})", level=logging.WARNING) + + +# ───────────────────────────── 색/대비 유틸 ───────────────────────────── +def _srgb_to_lum(rgb: Tuple[int, int, int]) -> float: + r, g, b = [v / 255.0 for v in rgb] + def _ch(c): return (c / 12.92) if c <= 0.04045 else ((c + 0.055) / 1.055) ** 2.4 + r, g, b = _ch(r), _ch(g), _ch(b) + return 0.2126 * r + 0.7152 * g + 0.0722 * b + +def _contrast_ratio(c1: Tuple[int, int, int], c2: Tuple[int, int, int]) -> float: + L1, L2 = _srgb_to_lum(c1), _srgb_to_lum(c2) + L1, L2 = max(L1, L2), min(L1, L2) + return (L1 + 0.05) / (L2 + 0.05) + +def _ensure_wcag_contrast(fg: Tuple[int, int, int], bg: Tuple[int, int, int], target: float = 4.5) -> Tuple[int, int, int]: + # 흑/백/원색 후보 중 최적 먼저 + candidates = [(0, 0, 0), (255, 255, 255), fg] + best = max(candidates, key=lambda c: _contrast_ratio(c, bg)) + if _contrast_ratio(best, bg) >= target: + return best + # 부족하면 밝기 차를 늘리는 방향으로 보정 + fa = np.array(fg, np.float32) + ba = np.array(bg, np.float32) + direction = np.sign(fa - ba) + for k in (32, 64, 96, 128): + cand = np.clip(fa + direction * k, 0, 255).astype(np.uint8) + if _contrast_ratio(tuple(map(int, cand)), bg) >= target: + return tuple(map(int, cand)) + return tuple(map(int, fa)) + + +# ───────────────────────────── 기하/합성 유틸 ───────────────────────────── +def _order_quad(pts: List[Tuple[int, int]]) -> np.ndarray: + pts = np.array(pts, dtype=np.float32) + s = pts.sum(axis=1) + d = np.diff(pts, axis=1).ravel() + tl = pts[np.argmin(s)] + br = pts[np.argmax(s)] + tr = pts[np.argmin(d)] + bl = pts[np.argmax(d)] + return np.array([tl, tr, br, bl], dtype=np.float32) + +def _poly_bbox(poly: List[Tuple[int, int]]) -> Tuple[int, int, int, int]: + arr = np.array(poly) + x1, y1 = int(np.min(arr[:, 0])), int(np.min(arr[:, 1])) + x2, y2 = int(np.max(arr[:, 0])), int(np.max(arr[:, 1])) + return x1, y1, x2, y2 + +def _median_bg_rgb(image_bgr: np.ndarray, x1: int, y1: int, x2: int, y2: int) -> Tuple[int, int, int]: + h, w = image_bgr.shape[:2] + x1 = max(0, x1); y1 = max(0, y1); x2 = min(w, x2); y2 = min(h, y2) + if x2 <= x1 or y2 <= y1: + return (200, 200, 200) + region = image_bgr[y1:y2, x1:x2].reshape(-1, 3) + if region.size == 0: + return (200, 200, 200) + m = np.median(region, axis=0) # BGR + return (int(m[2]), int(m[1]), int(m[0])) # RGB + +def _alpha_blend(dst_bgr: np.ndarray, src_rgba: np.ndarray) -> np.ndarray: + b, g, r, a = cv2.split(src_rgba) + a = a.astype(np.float32) / 255.0 + fg = cv2.merge([b, g, r]).astype(np.float32) + bg = dst_bgr.astype(np.float32) + out = fg * a[..., None] + bg * (1.0 - a[..., None]) + return out.astype(np.uint8) + +def _apply_blend_mode(dst_bgr: np.ndarray, blended_bgr: np.ndarray, mode: str = "normal") -> np.ndarray: + if mode == "normal": + return blended_bgr + d = dst_bgr.astype(np.float32) / 255.0 + s = blended_bgr.astype(np.float32) / 255.0 + if mode == "multiply": + out = d * s + elif mode == "screen": + out = 1 - (1 - d) * (1 - s) + elif mode == "overlay": + mask = d <= 0.5 + out = np.empty_like(d) + out[mask] = 2 * d[mask] * s[mask] + out[~mask] = 1 - 2 * (1 - d[~mask]) * (1 - s[~mask]) + else: + out = s + return (np.clip(out, 0, 1) * 255).astype(np.uint8) + +def _warp_rgba_to_polygon(dst_bgr: np.ndarray, src_rgba: np.ndarray, polygon: List[Tuple[int, int]]) -> np.ndarray: + h, w = dst_bgr.shape[:2] + x1, y1, x2, y2 = _poly_bbox(polygon) + W, H = max(1, x2 - x1), max(1, y2 - y1) + src_rgba = cv2.resize(src_rgba, (W, H), interpolation=cv2.INTER_LANCZOS4) + Hm = cv2.getPerspectiveTransform( + np.array([[0, 0], [W, 0], [W, H], [0, H]], dtype=np.float32), + _order_quad(polygon) + ) + warped = cv2.warpPerspective(src_rgba, Hm, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_TRANSPARENT) + return _alpha_blend(dst_bgr, warped) + + +# ───────────────────────────── 마켓 프리셋 ───────────────────────────── +def _create_market_presets() -> Dict[str, Dict[str, Any]]: + """쿠팡/네이버 느낌 프리셋.""" + COUPANG_BLUE = (0, 116, 228) + NAVER_GREEN = (3, 199, 90) + return { + "coupang_basic": { + "color": (0, 0, 0), "bg_color": None, + "outline": 1, "shadow": 1, + "letter_spacing": 0, "line_height": 1.12, + "blend_mode": "overlay", "max_font": 96, + "rounded_bg": False, "padding": (0, 0), + }, + "coupang_badge": { + "color": (255, 255, 255), "bg_color": COUPANG_BLUE, + "outline": 0, "shadow": 2, + "letter_spacing": -1, "line_height": 1.00, + "blend_mode": "screen", "max_font": 84, + "rounded_bg": True, "padding": (10, 6), + }, + "naver_basic": { + "color": (10, 10, 10), "bg_color": None, + "outline": 1, "shadow": 1, + "letter_spacing": 0, "line_height": 1.15, + "blend_mode": "normal", "max_font": 96, + "rounded_bg": False, "padding": (0, 0), + }, + "naver_price": { + "color": NAVER_GREEN, "bg_color": None, + "outline": 1, "shadow": 1, + "letter_spacing": 0, "line_height": 1.10, + "blend_mode": "screen", "max_font": 110, + "rounded_bg": False, "padding": (0, 0), + }, + } + + +# ───────────────────────────── 메인 클래스 ───────────────────────────── +class TextRenderingModule: + def __init__(self, logger, font_path: Optional[str] = None): + """ + logger: logger.log(msg, level=logging.INFO) 형태를 지원 + font_path: 외부에서 기본 폰트 경로 강제 지정 가능(보통 None) + """ + self.logger = logger + self.default_font_size = 20 + self.font_cache: Dict[str, ImageFont.FreeTypeFont] = {} + self.default_font_number = 3 + default_path = self._setup_default_fonts() + self.font_path = font_path or default_path + self._pango = _PangoCtx(self.logger) # Pango 준비 + self.logger.log("텍스트 렌더링 모듈(최종판) 초기화 완료", level=logging.INFO) + self.logger.log(f"기본 폰트 경로: {self.font_path}", level=logging.INFO) + + # ── 폰트 맵 + def _setup_default_fonts(self) -> Optional[str]: + base_path = "/app/worker/fonts/" + self.font_map: Dict[int, str] = { + 1: os.path.join(base_path, "HakgyoansimDunggeunmisoTTFB.ttf"), + 2: os.path.join(base_path, "NanumBarunGothic.ttf"), + 3: os.path.join(base_path, "NanumSquareRoundR.ttf"), + 4: os.path.join(base_path, "gamtanload.ttf"), + 5: os.path.join(base_path, "Cafe24Ohsquare-v2.0.ttf"), + } + for k in list(self.font_map.keys()): + p = self.font_map[k] + if not os.path.exists(p): + self.logger.log(f"[경고] 폰트 없음: {p} -> 제거", level=logging.WARNING) + del self.font_map[k] + if self.default_font_number in self.font_map: + return self.font_map[self.default_font_number] + if self.font_map: + first = next(iter(self.font_map.values())) + self.logger.log(f"[주의] 기본(3) 없음 → {first} 사용", level=logging.WARNING) + return first + self.logger.log("[오류] 사용 가능한 폰트가 없습니다. PIL 기본 폰트 사용", level=logging.ERROR) + return None + + def _get_font_path_by_number(self, font_number: Optional[int]) -> Optional[str]: + if font_number is not None: + if font_number in self.font_map: + return self.font_map[font_number] + self.logger.log(f"[경고] 알 수 없는 font_number={font_number} → 기본 사용", level=logging.WARNING) + return self.font_path + + # ── Pillow 폰트/측정 + def _get_pillow_font(self, size: int, font_path: Optional[str]) -> ImageFont.FreeTypeFont: + if font_path is None: + key = f"__PIL_DEFAULT__{size}" + if key not in self.font_cache: + self.font_cache[key] = ImageFont.load_default() + return self.font_cache[key] + key = f"{font_path}_{size}" + if key not in self.font_cache: + try: + self.font_cache[key] = ImageFont.truetype(font_path, size) + except Exception as e: + self.logger.log(f"[경고] PIL truetype 실패({e}) → 기본 폰트", level=logging.WARNING) + self.font_cache[key] = ImageFont.load_default() + return self.font_cache[key] + + def _estimate_text_size(self, text: str, font_size: int, font_path: Optional[str]) -> Tuple[int, int]: + font = self._get_pillow_font(font_size, font_path) + try: + bbox = font.getbbox(text) + w = bbox[2] - bbox[0]; h = bbox[3] - bbox[1] + except AttributeError: + w, h = font.getsize(text) + return w, h + + # ── 이분 탐색 폰트 피팅(단일 행 기준) + def _calc_font_fit_binary(self, text: str, tw: int, th: int, min_s: int, max_s: int, font_path: Optional[str]) -> int: + lo, hi = min_s, max_s + best = min_s + while lo <= hi: + mid = (lo + hi) // 2 + w, h = self._estimate_text_size(text, mid, font_path) + if w <= tw and h <= th: + best = mid; lo = mid + 1 + else: + hi = mid - 1 + return best + + # ────────────────────── RGBA 텍스트 생성 (Pango 우선) ────────────────────── + def _render_text_rgba( + self, + text: str, + width: int, + font_family_or_path: str, + font_size: int, + color: Tuple[int, int, int], + *, + line_height: float = 1.15, + letter_spacing: int = 0, + rounded_bg: bool = False, + bg_color: Optional[Tuple[int, int, int]] = None, + padding: Tuple[int, int] = (0, 0), + outline: int = 0, + outline_color: Tuple[int, int, int] = (255, 255, 255), + shadow: int = 0, + shadow_color: Tuple[int, int, int] = (0, 0, 0), + ) -> np.ndarray: + """ + width 폭 안에서 줄바꿈/레이아웃을 적용한 텍스트 RGBA 생성. + - Pango/PangoCairo 사용 가능 시 그 경로 사용 + - 아니면 Pillow로 폴백 + """ + if self._pango.available: + try: + return self._render_text_rgba_pango( + text, width, font_family_or_path, font_size, color, + line_height=line_height, letter_spacing=letter_spacing, + rounded_bg=rounded_bg, bg_color=bg_color, padding=padding, + outline=outline, outline_color=outline_color, + shadow=shadow, shadow_color=shadow_color + ) + except Exception as e: + self.logger.log(f"[Pango] 실패 → Pillow 폴백: {e}", level=logging.WARNING) + return self._render_text_rgba_pillow( + text, width, font_family_or_path, font_size, color, + line_height=line_height, letter_spacing=letter_spacing, + rounded_bg=rounded_bg, bg_color=bg_color, padding=padding, + outline=outline, outline_color=outline_color, + shadow=shadow, shadow_color=shadow_color + ) + + # Pango/Cairo 경로 + def _render_text_rgba_pango( + self, text: str, width: int, font_family: str, font_size: int, color: Tuple[int, int, int], + *, line_height: float, letter_spacing: int, + rounded_bg: bool, bg_color: Optional[Tuple[int, int, int]], + padding: Tuple[int, int], outline: int, outline_color: Tuple[int, int, int], + shadow: int, shadow_color: Tuple[int, int, int], + ) -> np.ndarray: + Pango, PangoCairo, cairo = self._pango.Pango, self._pango.PangoCairo, self._pango.cairo + + # 측정용 surface + surf = cairo.ImageSurface(cairo.FORMAT_ARGB32, 1, 1) + ctx = cairo.Context(surf) + layout = PangoCairo.create_layout(ctx) + + fd = Pango.FontDescription(f"{font_family} {font_size}") + layout.set_font_description(fd) + layout.set_width(width * Pango.SCALE) + layout.set_wrap(Pango.WrapMode.WORD_CHAR) + layout.set_alignment(Pango.Alignment.LEFT) + + attrs = Pango.AttrList() + if letter_spacing != 0: + attrs.insert(Pango.attr_letter_spacing_new(letter_spacing * Pango.SCALE)) + layout.set_attributes(attrs) + layout.set_text(text, -1) + + # 전체 크기 + lw, lh = layout.get_pixel_size() + if lh <= 0: lh = font_size + + px, py = padding + W = max(1, width + px * 2 + outline * 2 + shadow * 2) + H = max(1, int(lh * line_height) + py * 2 + outline * 2 + shadow * 2) + + surf2 = cairo.ImageSurface(cairo.FORMAT_ARGB32, W, H) + ctx2 = cairo.Context(surf2) + + # 배경 + if bg_color is not None: + r, g, b = [v / 255.0 for v in bg_color] + ctx2.set_source_rgba(r, g, b, 1.0) + rad = 6 if rounded_bg else 0 + self._cairo_round_rect(ctx2, 0, 0, W, H, rad) + ctx2.fill() + + # 섀도 + tx = px + outline + shadow + ty = py + outline + shadow + if shadow > 0: + r, g, b = [v / 255.0 for v in shadow_color] + ctx2.set_source_rgba(r, g, b, 0.5) + PangoCairo.update_layout(ctx2, layout) + ctx2.move_to(tx + shadow, ty + shadow) + PangoCairo.show_layout(ctx2, layout) + + # 외곽선 + if outline > 0: + r, g, b = [v / 255.0 for v in outline_color] + ctx2.set_source_rgba(r, g, b, 1.0) + ctx2.set_line_width(outline * 2) + PangoCairo.update_layout(ctx2, layout) + ctx2.move_to(tx, ty) + PangoCairo.layout_path(ctx2, layout) + ctx2.stroke() + + # 본문 + r, g, b = [v / 255.0 for v in color] + ctx2.set_source_rgba(r, g, b, 1.0) + ctx2.move_to(tx, ty) + PangoCairo.show_layout(ctx2, layout) + + buf = surf2.get_data() + arr = np.frombuffer(buf, np.uint8).reshape(H, W, 4) + return arr.copy() # ARGB premultiplied -> RGBA와 동등 취급 + + @staticmethod + def _cairo_round_rect(ctx, x, y, w, h, r): + if r <= 0: + ctx.rectangle(x, y, w, h); return + import math as _m + ctx.new_sub_path() + ctx.arc(x + w - r, y + r, r, -90 * _m.pi / 180, 0) + ctx.arc(x + w - r, y + h - r, r, 0, 90 * _m.pi / 180) + ctx.arc(x + r, y + h - r, r, 90 * _m.pi / 180, 180 * _m.pi / 180) + ctx.arc(x + r, y + r, r, 180 * _m.pi / 180, 270 * _m.pi / 180) + ctx.close_path() + + # Pillow 폴백 경로(간단 다중행) + def _render_text_rgba_pillow( + self, text: str, width: int, font_path_or_name: str, font_size: int, color: Tuple[int, int, int], + *, line_height: float, letter_spacing: int, rounded_bg: bool, bg_color: Optional[Tuple[int, int, int]], + padding: Tuple[int, int], outline: int, outline_color: Tuple[int, int, int], + shadow: int, shadow_color: Tuple[int, int, int] + ) -> np.ndarray: + try: + font = ImageFont.truetype(font_path_or_name, font_size) + except Exception: + font = ImageFont.load_default() + + # 폭 기준 줄바꿈 + def _wrap(txt: str) -> str: + lines, line = [], "" + for ch in txt: + test = line + ch + w = int(font.getlength(test)) if hasattr(font, "getlength") else font.getbbox(test)[2] + if w <= width or not line: + line = test + else: + lines.append(line); line = ch + if line: lines.append(line) + return "\n".join(lines) + + wrapped = _wrap(text) + lines = wrapped.split("\n") + max_w, total_h = 0, 0 + for ln in lines: + w = int(font.getlength(ln)) if hasattr(font, "getlength") else font.getbbox(ln)[2] + h = font.getbbox(ln)[3] + max_w = max(max_w, w) + total_h += int(h * line_height) + + px, py = padding + W = max(1, max_w + px * 2 + outline * 2 + shadow * 2) + H = max(1, total_h + py * 2 + outline * 2 + shadow * 2) + + img = Image.new("RGBA", (W, H), (255, 255, 255, 0)) + draw = ImageDraw.Draw(img) + + # 배경 + if bg_color is not None: + self._draw_round_rect(draw, (0, 0, W, H), 6 if rounded_bg else 0, bg_color + (255,)) + + # 섀도/외곽선/본문 + y = py + outline + shadow + for ln in lines: + # 섀도 + if shadow > 0: + draw.text((px + outline + shadow * 2, y + shadow), + ln, font=font, fill=shadow_color + (128,)) + # 외곽선 + if outline > 0: + for ox in range(-outline, outline + 1): + for oy in range(-outline, outline + 1): + if ox == 0 and oy == 0: continue + draw.text((px + outline + ox, y + outline + oy), + ln, font=font, fill=outline_color + (255,)) + # 본문 + draw.text((px + outline, y + outline), ln, font=font, fill=color + (255,)) + h = font.getbbox(ln)[3] + y += int(h * line_height) + + return np.array(img, dtype=np.uint8) + + @staticmethod + def _draw_round_rect(draw: ImageDraw.ImageDraw, box: Tuple[int, int, int, int], radius: int, fill: Tuple[int, int, int, int]): + x1, y1, x2, y2 = box + w, h = x2 - x1, y2 - y1 + rr = Image.new("RGBA", (w, h), (255, 255, 255, 0)) + d = ImageDraw.Draw(rr) + if radius <= 0: + d.rectangle((0, 0, w, h), fill=fill) + else: + d.rounded_rectangle((0, 0, w, h), radius=radius, fill=fill) + draw.bitmap((x1, y1), rr) + + # ────────────────────── 기존 API: 회전 중심 정렬 ────────────────────── + def render_text( + self, + image: np.ndarray, + ocr_results: List[Dict], + translated_texts: List[str], + font_number: Optional[int] = None, + use_wcag: bool = True, + outline: int = 1, + shadow: int = 0, + blend_mode: str = "normal", + ) -> np.ndarray: + """ + 기존 인터페이스 유지. (사다리꼴 투영이 아닌, 중심 정렬 + 회전) + """ + out = image.copy() + fpath = self._get_font_path_by_number(font_number) + + for r, t in zip(ocr_results, translated_texts): + poly = r['polygon'] + arr = np.array(poly) + x1, y1, x2, y2 = int(np.min(arr[:, 0])), int(np.min(arr[:, 1])), int(np.max(arr[:, 0])), int(np.max(arr[:, 1])) + W, H = max(1, x2 - x1), max(1, y2 - y1) + + # 폰트 크기(단일 행) + font_size = self._calc_font_fit_binary(t, W, H, 8, 120, fpath) + + # 배경색 → 텍스트색 + bg = _median_bg_rgb(out, x1, y1, x2, y2) + txt_color = (0, 0, 0) if _srgb_to_lum(bg) > 0.5 else (255, 255, 255) + if use_wcag: + txt_color = _ensure_wcag_contrast(txt_color, bg, 4.5) + + # RGBA 만들고(단일 행), 회전/중앙 배치 + rgba = self._render_text_rgba( + text=t, width=W, font_family_or_path=fpath or "NanumSquareRound", + font_size=font_size, color=txt_color, + outline=outline, shadow=shadow + ) + if len(arr) >= 2: + dx = float(arr[1][0] - arr[0][0]); dy = float(arr[1][1] - arr[0][1]) + ang = math.degrees(math.atan2(dy, dx)) + else: + ang = 0.0 + pil_rgba = Image.fromarray(rgba) + if ang != 0: + pil_rgba = pil_rgba.rotate(ang, expand=True) + rgba = np.array(pil_rgba) + + tw, th = rgba.shape[1], rgba.shape[0] + cx, cy = (x1 + x2) // 2, (y1 + y2) // 2 + xx, yy = int(cx - tw // 2), int(cy - th // 2) + + # 경계 체크 & 합성 + x0, y0 = max(0, xx), max(0, yy) + xE, yE = min(out.shape[1], xx + tw), min(out.shape[0], yy + th) + sx, sy = max(0, -xx), max(0, -yy) + if x0 < xE and y0 < yE: + roi = out[y0:yE, x0:xE] + cut = rgba[sy:sy + (yE - y0), sx:sx + (xE - x0)] + blended = _alpha_blend(roi, cut) + blended = _apply_blend_mode(roi, blended, mode=blend_mode) + out[y0:yE, x0:xE] = blended + + return out + + # ────────────────────── 신규: 4점 폴리곤 원근 투영 ────────────────────── + def render_text_on_quadrilateral( + self, + image: np.ndarray, + polygon: List[Tuple[int, int]], + text: str, + font_number: Optional[int] = None, + use_wcag: bool = True, + multiline: bool = True, + outline: int = 1, + shadow: int = 0, + letter_spacing: int = 0, + line_height: float = 1.15, + blend_mode: str = "normal", + ) -> np.ndarray: + out = image.copy() + x1, y1, x2, y2 = _poly_bbox(polygon) + W, H = max(1, x2 - x1), max(1, y2 - y1) + fpath = self._get_font_path_by_number(font_number) + + bg = _median_bg_rgb(out, x1, y1, x2, y2) + txt_color = (0, 0, 0) if _srgb_to_lum(bg) > 0.5 else (255, 255, 255) + if use_wcag: + txt_color = _ensure_wcag_contrast(txt_color, bg, 4.5) + + # 대략적 폰트 크기 추정(높이에 근사) + font_size = max(12, min(int(H * 0.85), 120)) + + rgba = self._render_text_rgba( + text=text, width=W, font_family_or_path=fpath or "NanumSquareRound", + font_size=font_size, color=txt_color, + line_height=line_height, letter_spacing=letter_spacing, + outline=outline, shadow=shadow + ) + out = _warp_rgba_to_polygon(out, rgba, polygon) + out = _apply_blend_mode(image, out, mode=blend_mode) + return out + + # ────────────────────── 신규: 마켓 프리셋 일괄 적용 ────────────────────── + def render_with_market_preset( + self, + image_bgr: np.ndarray, + ocr_results: List[Dict], + translated_texts: List[str], + market: str = "coupang", # 'coupang' | 'naver' + preset: str = "basic", # 'basic' | 'badge' | 'price' + font_number: Optional[int] = None, + ) -> np.ndarray: + presets = _create_market_presets() + key = f"{market}_{preset}" + if key not in presets: + key = "coupang_basic" if market == "coupang" else "naver_basic" + self.logger.log(f"[preset] {market}/{preset} 없음 → {key} 사용", level=logging.WARNING) + st = presets[key] + + out = image_bgr.copy() + fpath = self._get_font_path_by_number(font_number) or "NanumSquareRound" + + for r, txt in zip(ocr_results, translated_texts): + poly = r["polygon"] + x1, y1, x2, y2 = _poly_bbox(poly) + W, H = max(1, x2 - x1), max(1, y2 - y1) + + # 배경-대비 반영 + bg = _median_bg_rgb(out, x1, y1, x2, y2) + fg = _ensure_wcag_contrast(st["color"], bg, 4.5) + + font_size = min(max(12, int(H * 0.85)), st["max_font"]) + rgba = self._render_text_rgba( + text=txt, width=W, font_family_or_path=fpath, font_size=font_size, color=fg, + line_height=st["line_height"], letter_spacing=st["letter_spacing"], + rounded_bg=st["rounded_bg"], bg_color=st["bg_color"], padding=st["padding"], + outline=st["outline"], outline_color=(255, 255, 255), + shadow=st["shadow"], shadow_color=(0, 0, 0) + ) + out = _warp_rgba_to_polygon(out, rgba, poly) + out = _apply_blend_mode(image_bgr, out, mode=st["blend_mode"]) + return out