AutoTao/tt.py

import logging
from openai import OpenAI
import re
import json
import requests

class GPTClient:
    # def __init__(self, logger, api_key: str, model="gpt-4o-mini", temperature=0.0):
    def __init__(self, logger, api_key: str = "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", model="gpt-4o-mini", temperature=0.0):

        self.logger = logger
        self.client = OpenAI(api_key=api_key)
        self.api_key = api_key
        self.model = model
        self.temperature = temperature

    def ask_old(self, prompt: str) -> str:
        """프롬프트를 이용하여 GPT 모델로부터 응답을 받습니다."""
        try:
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                temperature=self.temperature,
            )
            return response.choices[0].message.content.strip()
        except Exception as e:
            self.logger.log(f"GPT 통신 오류: {e}", level=logging.ERROR, exc_info=True)
            return ""


    def ask(self, prompt: str) -> dict:
        """프롬프트를 이용하여 GPT 모델로부터 응답을 받습니다. 항상 JSON 형식으로 반환."""
        try:
            url = "http://146.56.101.199:8900/chatgpt"
            headers = {"Content-Type": "application/json"}
            payload = {"query": prompt}
            response = requests.post(url, json=payload, headers=headers)
            response.raise_for_status()  # HTTP 에러 발생 시 예외 발생

            # 응답 헤더의 Content-Type에 'application/json'이 포함되어 있으면 json() 메서드 사용
            if 'application/json' in response.headers.get('Content-Type', ''):
                return response.json()
            else:
                # 그렇지 않으면 response.text를 이용해 직접 파싱 시도
                return json.loads(response.text)

        except Exception as e:
            self.logger.log(f'GPT 통신 오류: {e}', level=logging.ERROR, exc_info=True)
            return {}


    def ask1(self, prompt: str) -> dict:
        """프롬프트를 이용하여 GPT 모델로부터 응답을 받습니다. 항상 JSON 형식으로 반환."""
        try:
            response = self.client.chat.completions.create(
                model=self.model,
                temperature=self.temperature,
                messages=[{"role": "user", "content": prompt}]
            )
            # GPT 응답 내용 가져오기
            content = response.choices[0].message.content.strip()
            self.logger.log(f'GPT 응답: {content}', level=logging.DEBUG)
            # 불필요한 포맷팅 제거 (```json```)
            cleaned_content = re.sub(r"^```json|```$", "", content).strip()

            # JSON 변환 시도
            return json.loads(cleaned_content)
        except json.JSONDecodeError as e:
            self.logger.log(f'JSON 디코딩 실패: {e}. 원본 응답: {content}', level=logging.ERROR, exc_info=True)

            return {}
        except Exception as e:
            self.logger.log(f'GPT 통신 오류: {e}', level=logging.ERROR, exc_info=True)
            return {}

    def generate_product_name_next(self, words: list, original_name: str, top_titles: list, title_generator_prompt, max_length=40) -> str:
        """
        주어진 단어와 원본 상품명을 참고하여 업계 용어와 고유 단어를 포함해 최종 상품명 생성.
        top_titles의 형식을 참고해 작성.
        """
        # 특수문자 제거 및 키워드 정리
        clean_words = [re.sub(r'[^\w\s]', '', word) for word in words]

        # 1. 상품명 생성 프롬프트 생성
        # product_prompt = (
        #     "너는 상품명 편집 전문가야. 주어진 중국 원본 상품명을 한국에서 잘 팔릴 수 있는 상품명으로 수정해야 해.\n\n"
        #     "### 작업 규칙:\n"
        #     "1. 키워드 배치는 자연스럽고, 고객이 검색할 때 쉽게 매칭되도록 작성.\n"
        #     "2. 아래의 이미 팔린상품명은 고객에게 선택된 신뢰도 높은 상품명들이야.\n"
        #     "3. 단어 리스트는 이미 팔린 상품명에서 금지단어와, 중복단어를 제거한 리스트야.\n"
        #     "4. 상품의 카테고리는 해당상품이 속해있는 카테고리로, 상품명 작성시 해당 카테고리에서 사용하는 전문용어들도 포함될수 있어.\n"
        #     "5. 원본 상품명과 팔린상품명을 적절히 조합하여 원본상품의 특징은 약간 살리면서 팔린상품명을 활용해 새로운 상품명을 만들어줘.\n"
        #     "6. 상품명의 길이제한은 한글기준 공백포함 40자 ~ 48자야.\n"
        # )

        option_prompt = (
            "### 입력 데이터:\n"
            f"- 원본 상품명: {original_name}\n"
            f"- 팔린 상품명 리스트: {top_titles}\n"
            f"- 팔린 상품명에서 추출한 단어 리스트: {clean_words}\n"
            # f"- 상품명 길이제한: 공백 포함 {max_length}자 ~ {max_length+(max_length*0.3)}자 이내\n\n"
            "### 출력 형식:\n"
            "JSON 형식으로 결과를 반환해줘:\n"
            "{ \"product_name\": \"수정된 상품명\" }\n"
        )

        product_prompt = title_generator_prompt + option_prompt

        # GPT에게 상품명 생성 요청
        product_response = self.ask(product_prompt)
        try:
            if not isinstance(product_response, dict):
                self.logger.log("[GPTClient] product_response가 dict가 아닙니다. 빈 결과 반환.", level=logging.ERROR)
                return ""

            return product_response.get("product_name", "").strip()
        except Exception as e:
            self.logger.log(f'Error parsing product name from GPT response: {e}', level=logging.ERROR, exc_info=True)
            return ""


    def generate_product_name_next_old(self, words: list, original_name: str, top_titles: list, unique_first_two_words : list, max_length=30) -> str:
        """
        주어진 단어와 원본 상품명을 참고하여 업계 용어와 고유 단어를 포함해 최종 상품명 생성.
        """
        # 특수문자 제거 및 키워드 정리
        clean_words = [re.sub(r'[^\w\s]', '', word) for word in words]

        # 1. 키워드 분류를 위한 프롬프트 생성
        classify_prompt = (
            "너는 키워드 분류 전문가야. 주어진 키워드와 참고 상품명을 분석하여 키워드를 대형, 중형, 소형으로 분류해줘.\n\n"
            "### 분류 기준:\n"
            "1. **대형 키워드**: 상품의 범주나 카테고리를 나타내며, 범용적이고 일반적인 키워드.연도, 지역, 대명사, 숫자로만 이루어진 단어 등\n"
            "2. **중형 키워드**: 대형 키워드에 추가적인 속성이 붙은 키워드로, 범위가 좁아짐.\n"
            "3. **소형 키워드**: 상품의 구체적이고 특징적인 키워드로, 가장 세부적인 정보.\n\n"
            "### 입력 데이터:\n"
            f"- 키워드 리스트: {clean_words}\n"
            f"- 팔린 상품명 리스트: {top_titles}\n\n"
            "### 출력 형식:\n"
            "{\n"
            "  \"large_keywords\": [\"대형 키워드 리스트\"],\n"
            "  \"medium_keywords\": [\"중형 키워드 리스트\"],\n"
            "  \"small_keywords\": [\"소형 키워드 리스트\"]\n"
            "}\n"
        )

        # GPT에게 키워드 분류 요청
        classify_response = self.ask(classify_prompt)
        if not classify_response:
            self.logger.log(f"GPT 응답이 비어 있습니다.", level=logging.ERROR, exc_info=True)
            return ""

        try:
            keyword_data = self.parse_json_response(classify_response)
            large_keywords = keyword_data.get("large_keywords", [])
            medium_keywords = keyword_data.get("medium_keywords", [])
            small_keywords = keyword_data.get("small_keywords", [])
        except json.JSONDecodeError as e:
            self.logger.log(f"키워드 분류 응답 파싱 오류: {e}. 응답 내용: {classify_response}", level=logging.ERROR, exc_info=True)
            return ""

        # 2. 상품명 생성 프롬프트 생성

        if not unique_first_two_words or not medium_keywords or not small_keywords: # 검색되지 않는 상품일 경우 원본상품명을 활용해 상품명 생성
            product_prompt = (
                "너는 상품명 편집 전문가야. 주어진 중국 원본 상품명을 단어단위로 구분하여 한국에서 잘 팔릴 수 있는 상품명으로 수정해야 해.\n\n"
                "### 작업 규칙:\n"
                "1. 연도, 지역, 과도한 홍보, 이벤트, 추상적인 표현등인 모두 지워줘. 지양해야한다는 얘기야.\n"
                "2. 상품명에는 고유 상품 코드가 포함되어야 함. 단! 숫자로만 이루어진 단어는 제외해야해\n"
                "3. 키워드 배치는 자연스럽고 검색에 용이하도록 작성.\n"
                "4. 괄호나 대괄호등이 있다면 해당내용들은 모두 버리고, 12345같은 의미없는 나열은 지양해야해.\n"
                f"5. 중복을 피하고 중국어가 남아있으면 안되. \n\n"
                "### 입력 데이터:\n"
                f"- 원본 상품명: {original_name}\n"
                f"- 상품명 길이제한: 공백 포함 {max_length}자 ~ {max_length+(max_length*0.4)}자 이내\n\n"
                "### 출력 형식:\n"
                "{ \"product_name\": \"수정된 상품명\" }\n"
            )
        else:
            product_prompt = (
                "너는 상품명 편집 전문가야. 주어진 중국 원본 상품명과 키워드를 활용하여 한국에서 잘 팔릴 수 있는 상품명으로 수정해야 해.\n\n"
                "### 작업 규칙:\n"
                "1. 반드시 대형키워드는 제외한 소형키워드, 중형키워드를 조합하여 작성해야 함.\n"
                "2. 상품명에는 고유 상품 코드가 포함되어야 함. 단! 숫자로만 이루어진 단어는 제외해야해\n"
                "3. 키워드 배치는 자연스럽고 검색에 용이하도록 작성.\n"
                "4. 괄호나 대괄호등이 있다면 해당내용들은 모두 버리고, 12345같은 의미없는 나열은 지양해야해.\n"
                f"5. 반드시 필수 키워드 중 최소 2~3개를 넣어서 상품명을 작성.\n\n"
                "### 입력 데이터:\n"
                f"- 원본 상품명: {original_name}\n"
                f"- 필수 키워드: {unique_first_two_words}\n"
                f"- 중형 키워드: {medium_keywords}\n"
                f"- 소형 키워드: {small_keywords}\n\n"
                f"- 상품명 길이제한: 공백 포함 {max_length}자 ~ {max_length+(max_length*0.4)}자 이내\n\n"
                "### 출력 형식:\n"
                "{ \"product_name\": \"수정된 상품명\" }\n"
            )

        # GPT에게 상품명 생성 요청
        product_response = self.ask(product_prompt)
        try:
            product_data = self.parse_json_response(product_response)
            return product_data.get("product_name", "").strip()
        except json.JSONDecodeError as e:
            self.logger.log(f"Error parsing product name from GPT response: {e}", level=logging.ERROR, exc_info=True)

            return ""


    def parse_json_response(self, classify_response: str) -> dict:
        """
        주어진 응답에서 JSON 데이터를 추출하고 파싱합니다.
        """
        try:
            self.logger.log(f"classify_response : {classify_response}", level=logging.DEBUG)

            # 정규식을 사용하여 JSON 블록 추출
            match = re.search(r"\{.*\}", classify_response, re.DOTALL)
            if not match:
                self.logger.log("JSON 블록을 찾을 수 없습니다.")
                return {}

            json_str = match.group(0)  # JSON 문자열 추출
            result = json.loads(json_str)  # JSON 디코딩

            self.logger.log(f"result : {result}", level=logging.DEBUG)

            return result

        except json.JSONDecodeError as e:
            self.logger.log(f"파싱 오류: {e}. 응답 내용: {classify_response}", level=logging.ERROR, exc_info=True)

            return {}

    def recommend_category_and_tags(self, product_names: list, categories: list) -> str:
        """
        상품명 리스트와 카테고리 리스트를 기반으로 적합한 카테고리와 검색 태그를 추천.

        :param product_names: 상품명 리스트
        :param categories: 카테고리 리스트
        :return: JSON 형식의 추천 결과
        """
        try:
            # 적합한 카테고리 추천
            recommendations = []
            for product_name in product_names:
                # 각 상품명에 대해 적합한 카테고리 3개 추출
                category_scores = [
                    {"category": category, "score": self._calculate_similarity(product_name, category)}
                    for category in categories
                ]
                # 점수를 기준으로 정렬 후 상위 3개 선택
                top_categories = sorted(category_scores, key=lambda x: x["score"], reverse=True)[:3]

                # 상품명에서 태그 추출
                product_tags = self._generate_tags(product_name, categories)

                recommendations.append({
                    "product_name": product_name,
                    "recommended_categories": [cat["category"] for cat in top_categories],
                    "recommended_tags": product_tags
                })

            # JSON 형식으로 반환
            result = json.dumps(recommendations, ensure_ascii=False, indent=4)
            self.logger.debug("추천 결과 생성 완료.")
            return result

        except Exception as e:
            self.logger.error(f"카테고리와 태그 추천 중 오류 발생: {e}", exc_info=True)
            return json.dumps({"error": str(e)}, ensure_ascii=False)

    def _calculate_similarity(self, product_name: str, category: str) -> float:
        """
        상품명과 카테고리 간의 유사도를 계산 (예: 간단한 텍스트 유사도).

        :param product_name: 상품명
        :param category: 카테고리명
        :return: 유사도 점수
        """
        # 간단히 공통 단어의 비율로 유사도를 계산
        product_words = set(product_name.split())
        category_words = set(category.split())
        common_words = product_words.intersection(category_words)
        return len(common_words) / max(len(product_words), len(category_words))

    def _generate_tags(self, product_name: str, categories: list) -> list:
        """
        상품명 기반으로 검색 태그를 생성.

        :param product_name: 상품명
        :param categories: 전체 카테고리 리스트
        :return: 중복 단어가 제거된 태그 리스트
        """
        product_words = set(product_name.split())
        category_words = set(" ".join(categories).split())
        # 상품명에 없는 단어 중 랜덤 태그 5개 선택
        unique_words = category_words - product_words
        return list(unique_words)[:5]


# 테스트 코드
if __name__ == "__main__":
    # 로깅 설정: 콘솔 출력용
    logger = logging.getLogger("gpt_test")
    logger.setLevel(logging.DEBUG)
    handler = logging.StreamHandler()
    handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
    logger.addHandler(handler)

    # API 키 설정: 올바른 키를 입력하세요.
    API_KEY = "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"  # 테스트용 올바른 API 키를 입력

    try:
        # GPTClient 인스턴스 생성
        client = GPTClient(logger=logger, api_key=API_KEY, model="gpt-4o-mini", temperature=0.0)
    except ValueError as e:
        logger.error(e)
        exit(1)

    # 간단한 테스트 프롬프트: JSON 형식의 응답을 기대합니다.
    test_prompt = "```json\n{\"product_name\": \"테스트 상품명\"}\n```"

    result = client.ask(test_prompt)
    print("Test result:")
    print(result)