# worker/translator.py from abc import ABC, abstractmethod from typing import Iterable, List import os, re SEP = "\n" # 문장 구분자 - 줄바꿈 MAX_CHARS = 4500 # 한 번에 보낼 최대 길이(웹엔진 URL/쿼리 한계 회피) # ===== 인터페이스 ===== class ITranslator(ABC): @abstractmethod def translate_batch(self, texts: Iterable[str], src: str, dest: str) -> List[str]: ... # ===== 현재 기본: deep-translator ===== class DeepTranslatorAdapter(ITranslator): def __init__(self): from deep_translator import GoogleTranslator self.client = GoogleTranslator(source="zh-CN", target="ko") def _translate_one(self, text: str, src: str, dest: str) -> str: self.client.source = src self.client.target = dest return self.client.translate(text) def translate_batch(self, texts: Iterable[str], src: str, dest: str) -> List[str]: texts = list(texts) print(f"translate_batch in: {texts}") if not texts: return [] out: List[str] = [] buf: List[str] = [] total = 0 for t in texts: add_len = len(t) + len(SEP) if total + add_len > MAX_CHARS and buf: out.extend(self._translate_joined(buf, src, dest)) buf, total = [], 0 buf.append(t) total += add_len if buf: out.extend(self._translate_joined(buf, src, dest)) # 개수 불일치 시 안전하게 개별 재번역 if len(out) != len(texts): print(f"[WARN] translate_batch count mismatch: in={len(texts)} out={len(out)} -> fallback per-sentence") out = [self._safe_translate_item(t, src, dest) for t in texts] print(f"translate_batch out: {out}") return out def _safe_translate_item(self, t: str, src: str, dest: str) -> str: try: return self._normalize_spaces(self._translate_one(t, src, dest)) except Exception as e: return t def _normalize_spaces(self, s: str) -> str: s = s.replace("\u200b", "") s = re.sub(r"[ \t]+", " ", s) s = re.sub(r"\s+\n", "\n", s) s = re.sub(r"\n\s+", "\n", s) return s.strip() def _translate_joined(self, parts: List[str], src: str, dest: str) -> List[str]: joined_text = SEP.join(parts) try: tr = self._translate_one(joined_text, src, dest) except Exception as e: print(f"[WARN] batch translate error -> per-item fallback: {e}") return [self._safe_translate_item(p, src, dest) for p in parts] results = tr.split(SEP) results = [self._normalize_spaces(s) for s in results] if len(results) != len(parts): print(f"[WARN] split size mismatch: expected={len(parts)} got={len(results)} -> per-item fallback") return [self._safe_translate_item(p, src, dest) for p in parts] return results # ===== Azure ===== class AzureTranslatorAdapter(ITranslator): def __init__(self): import requests self.session = requests.Session() self.key = os.environ["AZURE_TRANSLATOR_KEY"] self.region = os.environ.get("AZURE_TRANSLATOR_REGION", "") self.endpoint = os.environ.get( "AZURE_TRANSLATOR_ENDPOINT", "https://api.cognitive.microsofttranslator.com" ) def translate_batch(self, texts, src, dest): import json, time # API는 한 번에 여러 문장 전송 가능 (요금/레이트 리밋에 유리) url = f"{self.endpoint}/translate?api-version=3.0&from={src}&to={dest}" headers = { "Ocp-Apim-Subscription-Key": self.key, "Ocp-Apim-Subscription-Region": self.region, "Content-Type": "application/json" } body = [{"text": t} for t in texts] # 간단 재시도 for i in range(3): r = self.session.post(url, headers=headers, data=json.dumps(body), timeout=10) if r.ok: data = r.json() return [item["translations"][0]["text"] for item in data] time.sleep(0.5 * (2 ** i)) r.raise_for_status() # 실패 시 예외 _provider_singleton: ITranslator | None = None def get_translator() -> ITranslator: global _provider_singleton if _provider_singleton is not None: return _provider_singleton provider = os.environ.get("TRANSLATOR_PROVIDER", "deep").lower() if provider == "azure": _provider_singleton = AzureTranslatorAdapter() else: _provider_singleton = DeepTranslatorAdapter() return _provider_singleton