# -*- coding: utf-8 -*- """ 텍스트 렌더링 모듈 - 인페인팅된 이미지에 번역된 텍스트를 자연스럽게 렌더링 (라이브러리화) """ import cv2 import numpy as np from PIL import Image, ImageDraw, ImageFont from typing import List, Dict, Any, Tuple, Optional import os import math class TextRenderingModule: def __init__(self, font_path: Optional[str] = None): self.font_path = font_path or os.path.join(os.path.dirname(__file__), 'HakgyoansimDunggeunmisoTTFB.ttf') self.default_font_size = 20 self.font_cache = {} self._setup_default_fonts() print("텍스트 렌더링 모듈 초기화 완료") print(f"기본 폰트: {self.font_path}") def _setup_default_fonts(self): possible_fonts = [ "/usr/share/fonts/truetype/nanum/NanumGothic.ttf", "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", "/System/Library/Fonts/AppleGothic.ttf", "C:/Windows/Fonts/malgun.ttf", "C:/Windows/Fonts/gulim.ttc" ] self.available_fonts = [f for f in possible_fonts if os.path.exists(f)] if self.available_fonts: self.default_font_path = self.available_fonts[0] else: self.default_font_path = self.font_path def get_font(self, size: int, font_path: Optional[str] = None) -> ImageFont.FreeTypeFont: font_path = font_path or self.default_font_path cache_key = f"{font_path}_{size}" if cache_key not in self.font_cache: try: if font_path and os.path.exists(font_path): font = ImageFont.truetype(font_path, size) else: font = ImageFont.load_default() self.font_cache[cache_key] = font except Exception as e: print(f"폰트 로드 오류: {e}") font = ImageFont.load_default() self.font_cache[cache_key] = font return self.font_cache[cache_key] def estimate_text_size(self, text: str, font_size: int, font_path: Optional[str] = None) -> Tuple[int, int]: font = self.get_font(font_size, font_path) try: bbox = font.getbbox(text) width = bbox[2] - bbox[0] height = bbox[3] - bbox[1] except AttributeError: width, height = font.getsize(text) return width, height def calculate_optimal_font_size(self, text: str, target_width: int, target_height: int, min_size: int = 8, max_size: int = 100, font_path: Optional[str] = None) -> int: best_size = min_size for size in range(min_size, max_size + 1): width, height = self.estimate_text_size(text, size, font_path) if width <= target_width and height <= target_height: best_size = size else: break return best_size def _estimate_background_color(self, image: np.ndarray, x1: int, y1: int, x2: int, y2: int) -> Tuple[int, int, int]: margin = 5 y1_exp = max(0, y1 - margin) y2_exp = min(image.shape[0], y2 + margin) x1_exp = max(0, x1 - margin) x2_exp = min(image.shape[1], x2 + margin) region = image[y1_exp:y2_exp, x1_exp:x2_exp] mean_color = np.mean(region, axis=(0, 1)) return (int(mean_color[2]), int(mean_color[1]), int(mean_color[0])) def _get_contrasting_color(self, bg_color: Tuple[int, int, int]) -> Tuple[int, int, int]: brightness = (bg_color[0] * 0.299 + bg_color[1] * 0.587 + bg_color[2] * 0.114) if brightness > 128: return (0, 0, 0) else: return (255, 255, 255) def render_text(self, image: np.ndarray, ocr_results: List[Dict], translated_texts: List[str], font_path: Optional[str] = None) -> np.ndarray: result_image = image.copy() for i, (ocr_result, translated_text) in enumerate(zip(ocr_results, translated_texts)): polygon = ocr_result['polygon'] polygon_array = np.array(polygon) x_coords = polygon_array[:, 0] y_coords = polygon_array[:, 1] x_min, x_max = int(np.min(x_coords)), int(np.max(x_coords)) y_min, y_max = int(np.min(y_coords)), int(np.max(y_coords)) width = x_max - x_min height = y_max - y_min optimal_font_size = self.calculate_optimal_font_size(translated_text, width, height, font_path=font_path) text_width, text_height = self.estimate_text_size(translated_text, optimal_font_size, font_path) center_x = (x_min + x_max) // 2 center_y = (y_min + y_max) // 2 text_x = center_x - text_width // 2 text_y = center_y - text_height // 2 angle = 0 if len(polygon_array) >= 2: dx = polygon_array[1][0] - polygon_array[0][0] dy = polygon_array[1][1] - polygon_array[0][1] angle = math.degrees(math.atan2(dy, dx)) bg_color = self._estimate_background_color(image, x_min, y_min, x_max, y_max) text_color = self._get_contrasting_color(bg_color) result_image = self.render_text_on_image( result_image, translated_text, (text_x, text_y), font_size=optimal_font_size, font_path=font_path, text_color=text_color, background_color=None, angle=angle ) return result_image def render_text_on_image(self, image: np.ndarray, text: str, position: Tuple[int, int], font_size: Optional[int] = None, font_path: Optional[str] = None, text_color: Tuple[int, int, int] = (0, 0, 0), background_color: Optional[Tuple[int, int, int]] = None, angle: float = 0) -> np.ndarray: if font_size is None: font_size = self.default_font_size pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(pil_image) font = self.get_font(font_size, font_path) text_width, text_height = self.estimate_text_size(text, font_size, font_path) if background_color is not None: bg_x1 = position[0] - 2 bg_y1 = position[1] - 2 bg_x2 = position[0] + text_width + 2 bg_y2 = position[1] + text_height + 2 draw.rectangle([bg_x1, bg_y1, bg_x2, bg_y2], fill=background_color) if angle != 0: text_image = Image.new('RGBA', (text_width + 10, text_height + 10), (255, 255, 255, 0)) text_draw = ImageDraw.Draw(text_image) text_draw.text((5, 5), text, font=font, fill=text_color + (255,)) rotated_text = text_image.rotate(angle, expand=True) pil_image.paste(rotated_text, position, rotated_text) else: draw.text(position, text, font=font, fill=text_color) result_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) return result_image def create_text_styles(self) -> Dict[str, Dict[str, Any]]: """다양한 텍스트 스타일 정의""" styles = { 'default': { 'color': (0, 0, 0), 'bg_color': None, 'outline': True, 'outline_color': (255, 255, 255), 'outline_width': 1 }, 'bold': { 'color': (0, 0, 0), 'bg_color': (255, 255, 255), 'outline': True, 'outline_color': (128, 128, 128), 'outline_width': 2 }, 'highlight': { 'color': (255, 255, 255), 'bg_color': (255, 0, 0), 'outline': False, 'outline_color': None, 'outline_width': 0 }, 'subtle': { 'color': (128, 128, 128), 'bg_color': None, 'outline': True, 'outline_color': (255, 255, 255), 'outline_width': 1 } } return styles def render_with_style(self, image: np.ndarray, ocr_results: List[Dict], translated_texts: List[str], style_name: str = 'default') -> np.ndarray: """스타일을 적용한 텍스트 렌더링""" styles = self.create_text_styles() if style_name not in styles: print(f"알 수 없는 스타일: {style_name}") style_name = 'default' style = styles[style_name] # 기본 렌더링 후 스타일 적용 result = self.render_text(image, ocr_results, translated_texts) # 추가 스타일 처리는 여기서 구현 # (예: 그림자, 글로우 효과 등) return result def adjust_text_for_space(self, text: str, max_width: int, max_height: int, font_size: int) -> Tuple[str, int]: """ 공간에 맞게 텍스트 조정 Args: text (str): 원본 텍스트 max_width (int): 최대 너비 max_height (int): 최대 높이 font_size (int): 폰트 크기 Returns: Tuple[str, int]: 조정된 텍스트와 폰트 크기 """ # 텍스트가 너무 길면 줄바꿈 또는 생략 if len(text) > 20: # 긴 텍스트는 줄바꿈 words = text.split(' ') if len(words) > 1: mid = len(words) // 2 text = ' '.join(words[:mid]) + '\n' + ' '.join(words[mid:]) else: # 단어가 하나면 생략 text = text[:15] + '...' # 폰트 크기 조정 adjusted_font_size = font_size while adjusted_font_size > 8: # 실제로는 텍스트 크기를 측정해서 비교 estimated_width = len(text) * adjusted_font_size * 0.6 if estimated_width <= max_width: break adjusted_font_size -= 2 return text, adjusted_font_size def test_module(self): """텍스트 렌더링 모듈 테스트""" print("텍스트 렌더링 모듈 테스트 시작...") # 테스트 출력 디렉토리 생성 os.makedirs("test_output", exist_ok=True) # 배경 이미지 생성 test_image = np.ones((400, 600, 3), dtype=np.uint8) * 240 # 그라데이션 배경 for y in range(400): color_value = int(240 - (y / 400) * 40) # 색상 값이 255를 초과하지 않도록 제한 r = min(255, max(0, color_value)) g = min(255, max(0, color_value + 10)) b = min(255, max(0, color_value + 20)) test_image[y, :] = [r, g, b] # 테스트용 OCR 결과 생성 test_ocr_results = [ { 'text': '中文测试1', 'confidence': 0.95, 'polygon': [[50, 50], [200, 50], [200, 90], [50, 90]], 'bbox': (50, 50, 150, 40) }, { 'text': '价格优惠', 'confidence': 0.88, 'polygon': [[300, 100], [500, 100], [500, 140], [300, 140]], 'bbox': (300, 100, 200, 40) }, { 'text': '限时抢购', 'confidence': 0.92, 'polygon': [[100, 200], [250, 200], [250, 240], [100, 240]], 'bbox': (100, 200, 150, 40) }, { 'text': '新品上市', 'confidence': 0.89, 'polygon': [[350, 300], [550, 300], [550, 340], [350, 340]], 'bbox': (350, 300, 200, 40) } ] # 번역된 텍스트 translated_texts = [ "한국어 테스트1", "가격 할인", "한정시간 특가", "신상품 출시" ] # 기본 텍스트 렌더링 테스트 print("기본 렌더링 테스트...") rendered_image = self.render_text(test_image, test_ocr_results, translated_texts) cv2.imwrite("test_output/text_render_basic.jpg", rendered_image) # 다양한 스타일 테스트 styles = ['default', 'bold', 'highlight', 'subtle'] for style in styles: print(f"{style} 스타일 테스트...") styled_image = self.render_with_style( test_image, test_ocr_results, translated_texts, style ) cv2.imwrite(f"test_output/text_render_{style}.jpg", styled_image) # 텍스트 조정 테스트 print("텍스트 조정 테스트...") long_text = "이것은 매우 긴 텍스트입니다 공간에 맞게 조정됩니다" adjusted_text, adjusted_size = self.adjust_text_for_space( long_text, 150, 40, 20 ) print(f"원본: {long_text}") print(f"조정됨: {adjusted_text}, 크기: {adjusted_size}") # 비교 이미지 생성 comparison_images = [] comparison_images.append(test_image) for style in styles: styled_image = self.render_with_style( test_image, test_ocr_results, translated_texts, style ) comparison_images.append(styled_image) # 비교 이미지 생성 self._create_style_comparison(comparison_images, styles) print("텍스트 렌더링 모듈 테스트 완료!") def _create_style_comparison(self, images: List[np.ndarray], style_names: List[str]): """스타일 비교 이미지 생성""" if not images: return # 이미지 크기 조정 target_width = 200 target_height = int(images[0].shape[0] * target_width / images[0].shape[1]) resized_images = [] for img in images: resized = cv2.resize(img, (target_width, target_height)) resized_images.append(resized) # 비교 이미지 생성 num_images = len(resized_images) comparison_width = target_width * num_images comparison_height = target_height + 30 comparison = np.ones((comparison_height, comparison_width, 3), dtype=np.uint8) * 255 # 원본 이미지 comparison[30:30+target_height, 0:target_width] = resized_images[0] cv2.putText(comparison, "Original", (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) # 스타일 이미지들 for i, (img, style_name) in enumerate(zip(resized_images[1:], style_names)): x_offset = target_width * (i + 1) comparison[30:30+target_height, x_offset:x_offset+target_width] = img cv2.putText(comparison, style_name, (x_offset + 10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) cv2.imwrite("test_output/text_style_comparison.jpg", comparison) print("스타일 비교 이미지 저장 완료")