IMG_Worker/modules/test_openrouter_client.py

# -*- coding: utf-8 -*-
"""
OpenRouter 클라이언트 테스트 스크립트

사용법:
    python modules/test_openrouter_client.py

환경변수 설정:
    export OPENROUTER_API_KEY="sk-or-v1-xxxxx"
    또는 코드에서 직접 설정
"""
import os
import sys
import time
import logging
from typing import List, Dict, Any, Optional
from dataclasses import dataclass

# 로깅 설정
logging.basicConfig(
    level=logging.INFO,
    format='[%(asctime)s] [%(levelname)s] %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

# 모듈 경로 추가
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from modules.openrouter_client import OpenRouterTranslator


# =============================================================================
# 로그에서 추출한 샘플 OCR 데이터
# =============================================================================

# 샘플 1: 이미지 35 (7개 텍스트, 모두 중국어)
SAMPLE_1: List[Dict[str, Any]] = [
    {'text': '高密【拉毛布】', 'confidence': 0.8697245717048645},
    {'text': '柔中带韧不易坏', 'confidence': 0.9874316453933716},
    {'text': '安静无声助力深度睡眠', 'confidence': 0.9960897564888},
    {'text': 'newpet家的', 'confidence': 0.9988763928413391},
    {'text': '别人家的', 'confidence': 0.9997605085372925},
    {'text': '密织拉毛布保暖还结实', 'confidence': 0.9967950582504272},
    {'text': '劣质无纺布一拉就烂一洗就散', 'confidence': 0.9917935729026794},
]

# 샘플 2: 이미지 37 (3개 텍스트, 모두 중국어)
SAMPLE_2: List[Dict[str, Any]] = [
    {'text': '隐藏拉链不易咬', 'confidence': 0.9800551533699036},
    {'text': '宠物更安全', 'confidence': 0.9966630935668945},
    {'text': '隐藏式拉链+顺滑手感告别突元五金', 'confidence': 0.9670320749282837},
]

# 샘플 3: 이미지 38 (12개 중 11개 중국어, Productparameters 제외됨)
SAMPLE_3: List[Dict[str, Any]] = [
    {'text': '产品参数', 'confidence': 0.995430052280426},
    {'text': '品牌', 'confidence': 0.9999157786369324},
    {'text': '名称', 'confidence': 0.9999277591705322},
    {'text': 'NewPet妞派特', 'confidence': 0.9976658225059509},
    {'text': '加厚防风圆弧房子', 'confidence': 0.9947051405906677},
    {'text': '适用对象', 'confidence': 0.9994360208511353},
    {'text': '产品材质', 'confidence': 0.9989043474197388},
    {'text': '猫犬通用', 'confidence': 0.9970399141311646},
    {'text': '高密度海绵、牛津布', 'confidence': 0.995124101638794},
    {'text': '颜色', 'confidence': 0.9999042749404907},
    {'text': '多色可选', 'confidence': 0.9995521903038025},
]

# 샘플 4: 이미지 4 (9개 중 8개 중국어, newpet 제외됨)
SAMPLE_4: List[Dict[str, Any]] = [
    {'text': '工厂直销', 'confidence': 0.9989292621612549},
    {'text': '品质稳定有保证', 'confidence': 0.9961501359939575},
    {'text': '实力源头厂家，13年畅销好产品', 'confidence': 0.9883608222007751},
    {'text': 'M稳定货源', 'confidence': 0.8720976114273071},
    {'text': '诚实守信', 'confidence': 0.9912816882133484},
    {'text': 'M品质放心', 'confidence': 0.9013170003890991},
    {'text': '德国品质·13年畅销', 'confidence': 0.9565986394882202},
    {'text': '连续登顶天猫宠物窝TOP榜', 'confidence': 0.9966885447502136},
]

# 샘플 5: 이미지 1 (8개 텍스트, 모두 중국어)
SAMPLE_5: List[Dict[str, Any]] = [
    {'text': '二', 'confidence': 0.9579324722290039},
    {'text': '暖意不容等待', 'confidence': 0.9924479126930237},
    {'text': '趁"冷"下单，立享温暖', 'confidence': 0.9866428971290588},
    {'text': '降温预警', 'confidence': 0.9913228750228882},
    {'text': '广州', 'confidence': 0.9998736381530762},
    {'text': '低至10°℃-9℃', 'confidence': 0.9398331046104431},
    {'text': '部分城市', 'confidence': 0.9955496191978455},
    {'text': '未来7天降温情况', 'confidence': 0.9742034077644348},
]

# 필터링 전 원본 데이터 (중국어 없는 텍스트 포함)
SAMPLE_3_WITH_ENGLISH: List[Dict[str, Any]] = [
    {'text': 'Productparameters', 'confidence': 0.9952481389045715},  # 영어 (필터링됨)
    {'text': '产品参数', 'confidence': 0.995430052280426},
    {'text': '品牌', 'confidence': 0.9999157786369324},
    {'text': '名称', 'confidence': 0.9999277591705322},
    {'text': 'NewPet妞派特', 'confidence': 0.9976658225059509},
    {'text': '加厚防风圆弧房子', 'confidence': 0.9947051405906677},
    {'text': '适用对象', 'confidence': 0.9994360208511353},
    {'text': '产品材质', 'confidence': 0.9989043474197388},
    {'text': '猫犬通用', 'confidence': 0.9970399141311646},
    {'text': '高密度海绵、牛津布', 'confidence': 0.995124101638794},
    {'text': '颜色', 'confidence': 0.9999042749404907},
    {'text': '多色可选', 'confidence': 0.9995521903038025},
]

SAMPLE_4_WITH_ENGLISH: List[Dict[str, Any]] = [
    {'text': '工厂直销', 'confidence': 0.9989292621612549},
    {'text': '品质稳定有保证', 'confidence': 0.9961501359939575},
    {'text': '实力源头厂家，13年畅销好产品', 'confidence': 0.9883608222007751},
    {'text': 'M稳定货源', 'confidence': 0.8720976114273071},
    {'text': '诚实守信', 'confidence': 0.9912816882133484},
    {'text': 'M品质放心', 'confidence': 0.9013170003890991},
    {'text': 'newpet', 'confidence': 0.9935563206672668},  # 영어 (필터링됨)
    {'text': '德国品质·13年畅销', 'confidence': 0.9565986394882202},
    {'text': '连续登顶天猫宠物窝TOP榜', 'confidence': 0.9966885447502136},
]


# =============================================================================
# 테스트 함수들
# =============================================================================

@dataclass
class BenchmarkResult:
    """벤치마크 결과 데이터 클래스"""
    model_id: str
    elapsed_time: float
    success: bool
    results: Optional[List[str]] = None
    error: Optional[str] = None


def print_separator(title: str = ""):
    """구분선 출력"""
    print("\n" + "=" * 80)
    if title:
        print(f"  {title}")
        print("=" * 80)
    print()


def test_translate_ocr_texts(
    translator: OpenRouterTranslator,
    ocr_results: List[Dict[str, Any]],
    product_name: str = "테스트 상품",
    category: str = "테스트 카테고리",
    sample_name: str = "샘플"
):
    """translate_ocr_texts 메서드 테스트"""
    print_separator(f"{sample_name} - translate_ocr_texts 테스트")

    print(f"입력 텍스트 ({len(ocr_results)}개):")
    for i, item in enumerate(ocr_results, 1):
        print(f"  {i}. {item['text']}")

    print(f"\n상품명: {product_name}")
    print(f"카테고리: {category}")
    print(f"모델: {translator.get_current_model()['name']}")
    print("\n번역 중...")

    try:
        results = translator.translate_ocr_texts(
            product_name=product_name,
            category=category,
            ocr_results=ocr_results
        )

        print(f"\n번역 결과 ({len(results)}개):")
        for i, (original, translated) in enumerate(zip(ocr_results, results), 1):
            print(f"  {i}. {original['text']} → {translated}")

        return results
    except Exception as e:
        print(f"\n❌ 오류 발생: {e}")
        import traceback
        traceback.print_exc()
        return None


def test_run_llm_translation(
    translator: OpenRouterTranslator,
    ocr_results: List[Dict[str, Any]],
    product_name: str = "테스트 상품",
    category: str = "테스트 카테고리",
    steps: int = 2,
    sample_name: str = "샘플"
):
    """run_llm_translation 메서드 테스트 (steps 지원)"""
    print_separator(f"{sample_name} - run_llm_translation 테스트 (steps={steps})")

    print(f"입력 텍스트 ({len(ocr_results)}개):")
    for i, item in enumerate(ocr_results, 1):
        print(f"  {i}. {item['text']}")

    print(f"\n상품명: {product_name}")
    print(f"카테고리: {category}")
    print(f"모델: {translator.get_current_model()['name']}")
    print(f"번역 단계: {steps} ({'직역만' if steps == 1 else '직역+마케팅톤 변환'})")
    print("\n번역 중...")

    try:
        results = translator.run_llm_translation(
            product_name=product_name,
            category=category,
            ocr_results=ocr_results,
            steps=steps
        )

        print(f"\n번역 결과 ({len(results)}개):")
        for i, (original, translated) in enumerate(zip(ocr_results, results), 1):
            print(f"  {i}. {original['text']} → {translated}")

        return results
    except Exception as e:
        print(f"\n❌ 오류 발생: {e}")
        import traceback
        traceback.print_exc()
        return None


def test_filtering_comparison(
    translator: OpenRouterTranslator,
    filtered_results: List[Dict[str, Any]],
    unfiltered_results: List[Dict[str, Any]],
    product_name: str = "테스트 상품",
    category: str = "테스트 카테고리"
):
    """필터링 전/후 비교 테스트"""
    print_separator("필터링 전/후 비교 테스트")

    print("=" * 80)
    print("  중국어가 없는 텍스트를 포함한 경우 (필터링 전)")
    print("=" * 80)

    print(f"\n입력 텍스트 ({len(unfiltered_results)}개):")
    for i, item in enumerate(unfiltered_results, 1):
        has_chinese = any('\u4e00' <= char <= '\u9fff' for char in item['text'])
        marker = "✓" if has_chinese else "✗ (필터링됨)"
        print(f"  {i}. {item['text']} {marker}")

    print("\n번역 중...")
    try:
        results_unfiltered = translator.translate_ocr_texts(
            product_name=product_name,
            category=category,
            ocr_results=unfiltered_results
        )

        print("\n번역 결과:")
        for i, (original, translated) in enumerate(zip(unfiltered_results, results_unfiltered), 1):
            has_chinese = any('\u4e00' <= char <= '\u9fff' for char in original['text'])
            marker = "" if has_chinese else " (원본 유지)"
            print(f"  {i}. {original['text']} → {translated}{marker}")
    except Exception as e:
        print(f"\n❌ 오류 발생: {e}")
        import traceback
        traceback.print_exc()

    print("\n" + "=" * 80)
    print("  중국어만 포함한 경우 (필터링 후)")
    print("=" * 80)

    print(f"\n입력 텍스트 ({len(filtered_results)}개):")
    for i, item in enumerate(filtered_results, 1):
        print(f"  {i}. {item['text']}")

    print("\n번역 중...")
    try:
        results_filtered = translator.translate_ocr_texts(
            product_name=product_name,
            category=category,
            ocr_results=filtered_results
        )

        print("\n번역 결과:")
        for i, (original, translated) in enumerate(zip(filtered_results, results_filtered), 1):
            print(f"  {i}. {original['text']} → {translated}")
    except Exception as e:
        print(f"\n❌ 오류 발생: {e}")
        import traceback
        traceback.print_exc()

    print("\n" + "=" * 80)
    print("  분석 및 권장사항")
    print("=" * 80)
    print("""
    중국어가 없는 텍스트를 필터링하는 것에 대한 분석:

    1. 맥락 이해 측면:
       - 중국어가 없는 텍스트(영어, 숫자 등)는 보통 브랜드명, 모델명,
         또는 이미 번역된 텍스트일 가능성이 높음
       - 이러한 텍스트를 포함하면 LLM이 전체 맥락을 더 잘 이해할 수 있음
       - 예: "newpet" 브랜드명이 있으면 "newpet家的"의 번역 품질이 향상될 수 있음

    2. 토큰 사용 측면:
       - 필터링하지 않으면 입력 토큰이 증가함
       - 하지만 맥락 정보가 추가되어 번역 품질이 향상될 수 있음
       - 비용과 품질의 트레이드오프

    3. 권장사항:
       - 브랜드명, 모델명 등은 포함하는 것이 좋음 (맥락 이해 향상)
       - 순수 영어 설명문은 제외해도 무방 (중국어 번역 대상이 아님)
       - 숫자, 기호는 포함 (상품 정보의 일부)
       - 최종 결정은 번역 품질과 비용을 고려하여 선택
    """)


def benchmark_models(
    api_key: str,
    model_ids: List[str],
    ocr_results: List[Dict[str, Any]],
    product_name: str = "테스트 상품",
    category: str = "테스트 카테고리",
    use_llm_translation: bool = False,
    steps: int = 1
) -> List[BenchmarkResult]:
    """
    여러 모델을 벤치마크하여 비교

    Args:
        api_key: OpenRouter API 키
        model_ids: 테스트할 모델 ID 리스트
        ocr_results: OCR 결과 데이터
        product_name: 상품명
        category: 카테고리
        use_llm_translation: True면 run_llm_translation 사용, False면 translate_ocr_texts 사용
        steps: run_llm_translation 사용 시 번역 단계 (1=직역만, 2=직역+마케팅톤 변환)

    Returns:
        벤치마크 결과 리스트
    """
    benchmark_results: List[BenchmarkResult] = []

    method_name = "run_llm_translation" if use_llm_translation else "translate_ocr_texts"
    print_separator("벤치마크 시작")
    print(f"테스트 모델 수: {len(model_ids)}")
    print(f"입력 텍스트 수: {len(ocr_results)}")
    print(f"상품명: {product_name}")
    print(f"카테고리: {category}")
    print(f"사용 메서드: {method_name}")
    if use_llm_translation:
        print(f"번역 단계: {steps} ({'직역만' if steps == 1 else '직역+마케팅톤 변환'})")
    print()

    for idx, model_id in enumerate(model_ids, 1):
        print(f"[{idx}/{len(model_ids)}] 모델: {model_id}")
        print("-" * 80)

        start_time = time.time()
        success = False
        results = None
        error_msg = None

        try:
            # 번역기 초기화 (타임아웃 10초)
            translator = OpenRouterTranslator(
                api_key=api_key,
                model_id=model_id,
                timeout=10,
                logger=logger
            )

            # 번역 실행 (플래그에 따라 메서드 선택)
            if use_llm_translation:
                results = translator.run_llm_translation(
                    product_name=product_name,
                    category=category,
                    ocr_results=ocr_results,
                    steps=steps
                )


                # results = translator.run_combined_llm_translation(
                #     product_name=product_name,
                #     category=category,
                #     ocr_results=ocr_results,
                #     # steps=steps
                # )


            else:
                results = translator.translate_ocr_texts(
                    product_name=product_name,
                    category=category,
                    ocr_results=ocr_results
                )

            success = True
            elapsed_time = time.time() - start_time

            print(f"✅ 성공 - 소요 시간: {elapsed_time:.2f}초")
            print(f"번역 결과:")
            for i, (original, translated) in enumerate(zip(ocr_results, results), 1):
                print(f"  {i}. {original['text']} → {translated}")

        except Exception as e:
            elapsed_time = time.time() - start_time
            error_msg = str(e)
            print(f"❌ 실패 - 소요 시간: {elapsed_time:.2f}초")
            print(f"오류: {error_msg}")
            import traceback
            traceback.print_exc()

        benchmark_results.append(BenchmarkResult(
            model_id=model_id,
            elapsed_time=elapsed_time,
            success=success,
            results=results,
            error=error_msg
        ))

        print()

    return benchmark_results


def print_benchmark_summary(benchmark_results: List[BenchmarkResult]):
    """벤치마크 결과 요약 출력"""
    print_separator("벤치마크 결과 요약")

    # 성공한 결과만 필터링
    successful_results = [r for r in benchmark_results if r.success]
    failed_results = [r for r in benchmark_results if not r.success]

    if successful_results:
        # 실행 시간 기준 정렬
        sorted_results = sorted(successful_results, key=lambda x: x.elapsed_time)

        print("✅ 성공한 모델:")
        print("-" * 80)
        print(f"{'순위':<6} {'모델 ID':<50} {'소요 시간':<15} {'상태'}")
        print("-" * 80)

        for rank, result in enumerate(sorted_results, 1):
            status = "✅ 성공"
            print(f"{rank:<6} {result.model_id:<50} {result.elapsed_time:>10.2f}초  {status}")

        print()
        print("📊 통계:")
        times = [r.elapsed_time for r in successful_results]
        print(f"  평균 시간: {sum(times) / len(times):.2f}초")
        print(f"  최소 시간: {min(times):.2f}초 ({sorted_results[0].model_id})")
        print(f"  최대 시간: {max(times):.2f}초 ({sorted_results[-1].model_id})")
        print()

    if failed_results:
        print("❌ 실패한 모델:")
        print("-" * 80)
        for result in failed_results:
            print(f"  {result.model_id}")
            print(f"    오류: {result.error}")
            print(f"    소요 시간: {result.elapsed_time:.2f}초")
            print()

    # 결과 비교
    if len(successful_results) > 1:
        print_separator("번역 결과 비교")

        # 첫 번째 성공한 모델의 결과를 기준으로 비교
        base_result = successful_results[0]
        print(f"기준 모델: {base_result.model_id}")
        print()

        for result in successful_results[1:]:
            if result.results and base_result.results:
                print(f"모델: {result.model_id}")
                print("-" * 80)

                differences = []
                for i, (base_trans, comp_trans) in enumerate(zip(base_result.results, result.results), 1):
                    if base_trans != comp_trans:
                        differences.append(i)
                        print(f"  차이 {len(differences)}: 항목 {i}")
                        print(f"    기준: {base_trans}")
                        print(f"    비교: {comp_trans}")

                if not differences:
                    print("  모든 번역 결과가 기준 모델과 동일합니다.")
                else:
                    print(f"  총 {len(differences)}개 항목에서 차이 발견")
                print()


def main():
    """메인 테스트 함수"""
    print_separator("OpenRouter 클라이언트 벤치마크 테스트")

    # =============================================================================
    # 여기서 모델과 API 키를 직접 지정하세요
    # =============================================================================
    API_KEY = os.getenv("OPENROUTER_API_KEY", "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")


    # 테스트 데이터
    product_name = "NewPet 반려동물 House"
    category = "반려동물용품"
    test_sample = SAMPLE_1  # 또는 SAMPLE_2, SAMPLE_3 등

    # 벤치마크 설정
    USE_LLM_TRANSLATION = True  # True: run_llm_translation 사용, False: translate_ocr_texts 사용
    STEPS = 2  # run_llm_translation 사용 시 번역 단계 (1=직역만, 2=직역+마케팅톤 변환)

    # 벤치마크할 모델 ID 리스트
    MODEL_IDS = [
        # "xiaomi/mimo-v2-flash:free",
        # "openai/gpt-oss-20b:deepinfra/fp4",
        # "mistralai/devstral-2512:free",
        # "mistralai/mistral-7b-instruct:free",
        # "openai/gpt-oss-20b:gmicloud/fp4",
        # "z-ai/glm-4.5-air:novita/bf16",
        # "deepseek/deepseek-v3.2:atlas-cloud/fp8",
        "openai/gpt-5-nano:azure",
        # "google/gemma-3n-e4b-it:together",
        # "google/gemma-3-4b-it:deepinfra/bf16",
        # 추가 모델 ID를 여기에 추가하세요
    ]

    # 벤치마크 실행
    benchmark_results = benchmark_models(
        api_key=API_KEY,
        model_ids=MODEL_IDS,
        ocr_results=test_sample,
        product_name=product_name,
        category=category,
        use_llm_translation=USE_LLM_TRANSLATION,
        steps=STEPS
    )

    # 결과 요약 출력
    print_benchmark_summary(benchmark_results)

    print_separator("벤치마크 완료")


if __name__ == "__main__":
    main()