AutoPercenty3/titleManager/gpt_test.py

import logging
from openai import OpenAI
import json
import re
import logging
import requests

class GPTClient:
    def __init__(self, logger, api_key: str = "", model="gpt-4o-mini", temperature=0.0):
        self.logger = logger
        self.client = OpenAI(api_key=api_key)
        self.model = model
        self.temperature = temperature

    def ask(self, prompt: str) -> dict:
        """프롬프트를 이용하여 GPT 모델로부터 응답을 받습니다. 항상 JSON 형식으로 반환."""
        try:
            url = "http://146.56.101.199:8900/chatgpt"
            headers = {"Content-Type": "application/json"}
            payload = {"query": prompt}
            response = requests.post(url, json=payload, headers=headers)
            response.raise_for_status()  # HTTP 에러 발생 시 예외 발생

            # 응답 헤더의 Content-Type에 'application/json'이 포함되어 있으면 json() 메서드 사용
            if 'application/json' in response.headers.get('Content-Type', ''):
                return response.json()
            else:
                # 그렇지 않으면 response.text를 이용해 직접 파싱 시도
                return json.loads(response.text)

        except Exception as e:
            self.logger.log(f'GPT 통신 오류: {e}', level=logging.ERROR, exc_info=True)
            return {}

    def is_related_product(self, original_name: str, keyword_name: str) -> bool:
        """상품 연관 여부 판단"""
        prompt = (
            f"Are the products '{original_name}' and '{keyword_name}' from the same category? "
            "Respond strictly in JSON format: {\"related\": true} or {\"related\": false}."
        )
        result = self.ask(prompt)
        return result.get("related", False)

    def generate_product_name_next(self, words: list, original_name: str, top_titles: list, category, max_length=40) -> str:
        """
        주어진 단어와 원본 상품명을 참고하여 업계 용어와 고유 단어를 포함해 최종 상품명 생성.
        top_titles의 형식을 참고해 작성.
        """
        # 특수문자 제거 및 키워드 정리
        clean_words = [re.sub(r'[^\w\s]', '', word) for word in words]

        # 1. 키워드 분류를 위한 프롬프트 생성
        classify_prompt = (
            "너는 키워드 분류 전문가야. 주어진 키워드와 참고 상품명을 분석하여 키워드를 대형, 중형, 소형으로 분류해줘.\n\n"
            "### 분류 기준:\n"
            "1. **대형 키워드**: 상품의 범주나 카테고리를 나타내며, 범용적이고 일반적인 키워드.\n"
            "   - 예: '가방', '냉풍기', '여자구두', '책상'\n"
            "2. **중형 키워드**: 대형 키워드에 추가적인 속성이 붙은 키워드로, 범위가 좁아짐.\n"
            "   - 예: '스웨이드 가방', '산업용냉풍기', '여자앵클부츠', '접이식책상'\n"
            "3. **소형 키워드**: 상품의 구체적이고 특징적인 키워드로, 가장 세부적인 정보.\n"
            "   - 예: '스웨이드 크로스 가방', '산업용코끼리냉풍기', '키작은여자앵클부츠', '접이식 강화유리 책상'\n\n"
            "### 입력 데이터:\n"
            f"- 키워드 리스트: {clean_words}\n"
            f"- 팔린 상품명 리스트: {top_titles}\n\n"
            "### 출력 형식:\n"
            "JSON 형식으로 반환해줘:\n"
            "{\n"
            "  \"large_keywords\": [\"대형 키워드 리스트\"],\n"
            "  \"medium_keywords\": [\"중형 키워드 리스트\"],\n"
            "  \"small_keywords\": [\"소형 키워드 리스트\"]\n"
            "}\n"
        )

        # GPT에게 키워드 분류 요청
        classify_response = self.ask(classify_prompt)
        try:
            large_keywords = classify_response.get("large_keywords", [])
            medium_keywords = classify_response.get("medium_keywords", [])
            small_keywords = classify_response.get("small_keywords", [])
        except Exception as e:
            self.logger.log(f'Error parsing keyword classification response: {e}', level=logging.ERROR, exc_info=True)

            return ""

        # 2. 상품명 생성 프롬프트 생성
        product_prompt = (
            "너는 상품명 편집 전문가야. 주어진 중국 원본 상품명을 한국에서 잘 팔릴 수 있는 상품명으로 수정해야 해.\n\n"
            "### 작업 규칙:\n"
            # "1. 상품명은 소형, 중형, 대형 키워드를 조합하여 작성해야 해:\n"
            # "   - 소형 키워드: 전체 키워드의 50%를 차지하며, 가장 구체적이고 특징적인 키워드.\n"
            # "   - 중형 키워드: 전체 키워드의 30%를 차지하며, 소형 키워드보다 범위가 넓지만 대형 키워드보다는 구체적임.\n"
            # "   - 대형 키워드: 전체 키워드의 20%를 차지하며, 상품의 용도(사용 목적)와 재질(소재)를 포함.\n"
            # "2. 상품명에는 반드시 고유 상품 코드가 포함되어야 해.\n"
            "3. 키워드 배치는 자연스럽고, 고객이 검색할 때 쉽게 매칭되도록 작성.\n\n"
            "### 입력 데이터:\n"
            f"- 원본 상품명: {original_name}\n"
            f"- 팔린 상품명 리스트: {top_titles}\n"
            f"- 상품의 카테고리: {category}\n"
            # f"- 대형 키워드: {large_keywords}\n"
            # f"- 중형 키워드: {medium_keywords}\n"
            # f"- 소형 키워드: {small_keywords}\n\n"
            f"- 상품명 길이제한: 공백 포함 {max_length}자 ~ {max_length+(max_length*0.3)}자 이내\n\n"
            "### 출력 형식:\n"
            "JSON 형식으로 결과를 반환해줘:\n"
            "{ \"product_name\": \"수정된 상품명\" }\n"
        )

        # GPT에게 상품명 생성 요청
        product_response = self.ask(product_prompt)
        try:
            return product_response.get("product_name", "").strip()
        except Exception as e:
            self.logger.log(f'Error parsing product name from GPT response: {e}', level=logging.ERROR, exc_info=True)
            return ""

    def extract_proper_nouns(self, words: list, category: str) -> dict:

        """
        주어진 단어 리스트를 특정 카테고리에서 상표권 등록 가능성과 불가능성으로 필터링합니다.

        Args:
            words (list): 검증할 단어 리스트.
            category (str): 상품 카테고리 (예: "가전제품", "의류", "소프트웨어").

        Returns:
            dict: "eligible" 및 "ineligible"로 분류된 결과.
        """
        prompt = (
            f"Analyze the following list of words in the context of the '{category}' category: {words}. "
            "Determine if each word could potentially be eligible for trademark registration in this category based on these criteria:\n\n"
            "1. Words suitable for trademark registration:\n"
            "   - Unique identifiers or names that are not commonly used.\n"
            "   - Creative or distinctive words that are not descriptive of product features or functionality.\n\n"
            "2. Words not suitable for trademark registration:\n"
            "   - Generic or common terms that describe product features (e.g., 'Portable').\n"
            "   - Industry-standard terms (e.g., 'AirCooler').\n"
            "   - Common adjectives or technical terms (e.g., 'EcoFriendly').\n"
            "   - Words that are overly broad or generic.\n\n"
            "Respond strictly in JSON format with two categories: "
            "{'eligible': ['word1', 'word2'], 'ineligible': ['word3', 'word4']}."
        )
        response = self.ask(prompt)
        if isinstance(response, dict):
            return response
        else:
            self.logger.log(f'응답이 JSON 형식이 아닙니다: {response}', level=logging.ERROR)
            return {"eligible": [], "ineligible": []}

    def generate_promotion_text(self, recognized_texts: list, product_name: str, max_length=100) -> str:
        """
        Generate a promotional message based on recognized texts from the product image.

        :param recognized_texts: List of recognized texts from the product image.
        :param product_name: Name of the product.
        :param max_length: Maximum length of the promotional message.
        :return: Generated promotional message as a string.
        """
        # Create the prompt for GPT
        prompt = (
            f"The following texts were recognized from the product image: {recognized_texts}.\n\n"
            f"The product name is '{product_name}'.\n\n"
            "### Task:\n"
            "1. Create a compelling and concise promotional message in Korean based on the recognized texts and the product name.\n"
            f"2. The message must not exceed {max_length} characters, including spaces.\n"
            "3. Highlight the product's unique selling points to attract customers.\n\n"
            "### Output:\n"
            "Provide the promotional message as plain text without any additional formatting."
        )

        # Send the prompt to GPT
        self.logger.log(f"Sending prompt to GPT for promotional message generation: {prompt}", level=logging.DEBUG)
        response = self.ask(prompt)

        # Process the response
        if isinstance(response, str):
            result = response.strip()
            self.logger.log(f"Generated promotional message: {result}", level=logging.DEBUG)
            return result
        else:
            self.logger.log("GPT response is not a valid string.", level=logging.ERROR)
            raise ValueError("GPT response is not a valid string.")


    def translate_options(self, original_data, product_name):
        """
        주어진 옵션 데이터를 GPT 모델을 통해 번역하는 메서드.

        :param original_data: 원본 옵션 데이터 (dict 형태).
        :param product_name: 상품명 (str 형태).
        :return: 번역된 옵션명 (파이썬의 dict 형태).
        """
        self.logger.log(f"옵션 데이터를 번역 중: {original_data}", level=logging.DEBUG)

        # 데이터 정리
        cleaned_data = {key: self.clean_special_chars(value) for key, value in original_data.items()}
        self.logger.log(f"정리된 옵션 데이터: {cleaned_data}", level=logging.DEBUG)

        # GPT 프롬프트 생성
        prompt = (
            f"다음은 옵션의 특징만 간단하게 남긴 후 번역해야 할 원래 옵션 이름들입니다: {json.dumps(cleaned_data, ensure_ascii=False)}.\n\n"
            f"원래 제품 이름은 '{product_name}'이며, 번역 시 이를 참조하여 제품의 특징을 선별해야 합니다.\n\n"
            "### 번역 규칙:\n"
            "1. 각 옵션은 고유한 특징을 유지하면서 공백포함 25자 이내로 간단하게 만드세요.\n"
            "2. 각 옵션의 고유한 특징은 크기, 무게, 재질, 사이즈, 용량, 전압, 전류 또는 제품 코드를 말한다.\n"
            "3. 번역된 모든 옵션 이름은 한글로 작성해야 하며, 중국어가 포함되지 않아야 합니다.\n"
            "4. 번역 후 모든옵션에 공통된 단어들은 제거해야한다.\n"
            "5. 번역 후 옵션 이름이 중복될 경우, 원래 옵션 이름에서 추가적인 고유 특징을 추출하여 구별되도록 하세요.\n"
            "6. 고객 서비스 문의, 가격 문의, 견적 또는 예약을 요청하는 옵션 이름은 제거하세요.\n"
            "7. 의미가 일치하는 경우 긴 단어를 짧은 단어로 대체하세요 (예: 'Display Panel'을 'Screen'으로 대체).\n"
            "8. 번역된 옵션 이름은 다음과 같은 JSON 형식으로 반환하세요:\n\n"
            "{\n"
            "  \"trans_option_1\": \"번역된 옵션 이름 1\",\n"
            "  \"trans_option_2\": \"번역된 옵션 이름 2\",\n"
            "  \"trans_option_3\": \"번역된 옵션 이름 3\",\n"
            "  \"trans_option_4\": \"번역된 옵션 이름 4\"\n"
            "}"
        )


        # GPT 모델 호출
        self.logger.log("GPT 모델에 프롬프트를 전달하여 응답을 기다리는 중...", level=logging.DEBUG)
        gpt_response = self.ask(prompt)
        self.logger.log(f"GPT 응답: {gpt_response}", level=logging.DEBUG)

        # 응답 데이터에서 JSON 형식 추출
        if isinstance(gpt_response, dict):
            self.logger.log(f"번역된 데이터: {gpt_response}", level=logging.DEBUG)
            return gpt_response
        else:
            self.logger.log("GPT 응답이 JSON 형식이 아닙니다.", level=logging.ERROR)
            raise ValueError("GPT 응답이 JSON 형식이 아닙니다.")


    def clean_special_chars(self, text):
        """
        텍스트에서 허용되지 않는 특수 문자를 제거하고,
        필요한 특수 문자를 대체하는 메서드.

        :param text: 입력 텍스트.
        :return: 정리된 텍스트.
        """
        self.allowed_chars = "!$~()._-=+/"

        self.replacements = {
            "*" : "X",
            "【" : "(",
            "】" : ")",
            "[" : "(",
            "]" : ")",
            "," : "."
        }


        self.logger.log(f"텍스트에서 특수 문자를 정리 중: {text}", level=logging.DEBUG)

        cleaned_text = []

        for char in text:
            if char in self.replacements:
                cleaned_text.append(self.replacements[char])  # 대체 문자 추가
                self.logger.log(f"문자 '{char}'를 대체 문자로 변경: {self.replacements[char]}", level=logging.DEBUG)
            elif char not in self.allowed_chars and not char.isalnum() and not char.isspace():
                # self.logger.log(f"허용되지 않은 문자 제거: {char}", level=logging.DEBUG)
                continue  # 특수 문자 제거
            else:
                cleaned_text.append(char)  # 허용된 문자 추가
                # self.logger.log(f"허용된 문자 추가: {char}", level=logging.DEBUG)

        cleaned_text_str = ''.join(cleaned_text)
        self.logger.log(f"정리된 텍스트: {cleaned_text_str}", level=logging.DEBUG)
        return cleaned_text_str


    def translate_product_name(self, client, original_name: str) -> str:
        """중국어 상품명을 한국어로 번역하여 JSON 응답에서 번역 결과 텍스트만 반환하는 메서드.

        응답이 JSON 형식이 아닐 경우에는 원본 텍스트를 반환합니다.
        """
        try:
            # GPT에게 JSON 형식으로 응답하도록 요청하는 프롬프트 작성
            prompt = (
                f"중국어 상품명 [{original_name}]를 한국어로 번역해줘. "
                f"응답은 아래 JSON 형식으로 해줘:\n"
                f'{{"translation": "번역결과"}}'
            )

            # GPT 클라이언트에 프롬프트 전달 및 응답 받기
            response = client.ask(prompt)

            try:
                # 응답이 JSON 형식이라 가정하고 파싱
                translation_data = json.loads(response)
                translated_text = translation_data.get("translation", original_name)
            except json.JSONDecodeError:
                self.logger.error("응답이 유효한 JSON 형식이 아닙니다. 응답 내용: " + response)
                return original_name

            return translated_text
        except Exception as e:
            self.logger.error(f"번역 중 오류 발생: {e}", exc_info=True)
            return original_name

if __name__ == "__main__":
    # 로그 파일은 logs/test_gpt.log 로 생성됨 (파일 크기 제한 및 백업 설정은 logger_module.py에 설정되어 있음)

    client = GPTClient(logger=None)
    # prompt = "겨울철에 나는 과일을 json 형식으로 반환해줘"
    ori = "电动真空吸吊机不锈钢板铁板激光切割上料吸盘气动吊具起重电吸盘"
    prompt = f"중국어 [{ori}] 를 한국어로 번역해서 아래의 json형식으로 응답해줘.\ntranslation: 번역결과"

    result = client.translate_product_name(client, ori)

    print("GPT 응답:")
    print(result)