diff --git a/__pycache__/main.cpython-310.pyc b/__pycache__/main.cpython-310.pyc new file mode 100755 index 0000000..7c691ad Binary files /dev/null and b/__pycache__/main.cpython-310.pyc differ diff --git a/main.py b/main.py new file mode 100755 index 0000000..e59987c --- /dev/null +++ b/main.py @@ -0,0 +1,87 @@ +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from typing import List +import os +from paddlenlp import Taskflow +import logging + +# FastAPI 애플리케이션 초기화 +app = FastAPI() + +# PaddleNLP Taskflow 번역기 및 형태소 분석기 초기화 +machine_translation = Taskflow("text2text_generation", source_lang="zh", target_lang="ko", from_hf_hub=True) +word_segmentation = Taskflow("word_segmentation") + +# 로거 설정 +logger = logging.getLogger('default_logger') + +# Pydantic 모델 정의 (API 요청과 응답을 위한 데이터 구조) +class OptionRequest(BaseModel): + options: List[str] + +class OptionResponse(BaseModel): + translated_options: List[str] + +# 특수문자와 대체할 문자 정의 +allowed_special_chars = "!$~()._-=+/" +special_char_replacements = { + "*" : "X", # '*' 특수문자를 'X'로 대체 + "【" : "(", + "】" : ")", + "[" : "(", + "]" : ")", + "," : ".", +} + +# 옵션명을 처리하고 번역하는 API +@app.post("/process_options", response_model=OptionResponse) +async def process_options(option_request: OptionRequest): + options = option_request.options + + # 1. 특수문자 처리 + cleaned_option_names = [clean_special_characters(option, allowed_special_chars, special_char_replacements) for option in options] + logger.debug(f"특수문자 처리 후 옵션명: {cleaned_option_names}") + + # 2. 옵션명 형태소 분석 + segmented_options = segment_option_names(cleaned_option_names) + logger.debug(f"형태소 분석 결과: {segmented_options}") + + # 3. 공통 단어 제거 + unique_options = remove_common_words(segmented_options) + logger.debug(f"공통 단어 제거 후 고유한 옵션: {unique_options}") + + # 4. 번역 + translated_options = translate_options(unique_options) + logger.debug(f"번역된 옵션: {translated_options}") + + return OptionResponse(translated_options=translated_options) + +# 특수문자 처리 함수 +def clean_special_characters(option_name, allowed_special_chars, special_char_replacements): + for char, replacement in special_char_replacements.items(): + option_name = option_name.replace(char, replacement) + cleaned_name = ''.join([char for char in option_name if char.isalnum() or char in allowed_special_chars or char == ' ']) + return cleaned_name + +# 형태소 분석 함수 +def segment_option_names(option_names): + segmented_options = [] + for option in option_names: + segmented_words = word_segmentation(option) + segmented_options.append(segmented_words) + return segmented_options + +# 공통 단어 제거 함수 +def remove_common_words(segmented_options): + word_sets = [set(option) for option in segmented_options] + common_words = set.intersection(*word_sets) + unique_options = [[word for word in option if word not in common_words] for option in segmented_options] + return unique_options + +# 번역 함수 +def translate_options(unique_options): + translated_options = [] + for option in unique_options: + translated_text = machine_translation(' '.join(option)) + translated_options.append(translated_text) + return translated_options diff --git a/requirements.txt b/requirements.txt new file mode 100755 index 0000000..adca66a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +paddlepaddle +paddleocr +paddlenlp +uvicorn +guvicorn +fastapi diff --git a/server.py b/server.py deleted file mode 100755 index 8949525..0000000 --- a/server.py +++ /dev/null @@ -1,15 +0,0 @@ -# server.py -from fastapi import FastAPI, Request -from paddlenlp import Taskflow - -app = FastAPI() - -# 모델 로드 (ChatGLM 등 대형 모델) -machine_translation = Taskflow("text2text_generation", source_lang="zh", target_lang="ko") - -@app.post("/translate") -async def translate(request: Request): - body = await request.json() - text = body["text"] - result = machine_translation(text) - return {"translated_text": result}