88 lines
3.1 KiB
Python
Executable File
88 lines
3.1 KiB
Python
Executable File
from fastapi import FastAPI, HTTPException
|
|
from pydantic import BaseModel
|
|
from typing import List
|
|
import os
|
|
from paddlenlp import Taskflow
|
|
import logging
|
|
|
|
# FastAPI 애플리케이션 초기화
|
|
app = FastAPI()
|
|
|
|
# PaddleNLP Taskflow 번역기 및 형태소 분석기 초기화
|
|
machine_translation = Taskflow("text2text_generation", source_lang="zh", target_lang="ko", from_hf_hub=True)
|
|
word_segmentation = Taskflow("word_segmentation")
|
|
|
|
# 로거 설정
|
|
logger = logging.getLogger('default_logger')
|
|
|
|
# Pydantic 모델 정의 (API 요청과 응답을 위한 데이터 구조)
|
|
class OptionRequest(BaseModel):
|
|
options: List[str]
|
|
|
|
class OptionResponse(BaseModel):
|
|
translated_options: List[str]
|
|
|
|
# 특수문자와 대체할 문자 정의
|
|
allowed_special_chars = "!$~()._-=+/"
|
|
special_char_replacements = {
|
|
"*" : "X", # '*' 특수문자를 'X'로 대체
|
|
"【" : "(",
|
|
"】" : ")",
|
|
"[" : "(",
|
|
"]" : ")",
|
|
"," : ".",
|
|
}
|
|
|
|
# 옵션명을 처리하고 번역하는 API
|
|
@app.post("/process_options", response_model=OptionResponse)
|
|
async def process_options(option_request: OptionRequest):
|
|
options = option_request.options
|
|
|
|
# 1. 특수문자 처리
|
|
cleaned_option_names = [clean_special_characters(option, allowed_special_chars, special_char_replacements) for option in options]
|
|
logger.debug(f"특수문자 처리 후 옵션명: {cleaned_option_names}")
|
|
|
|
# 2. 옵션명 형태소 분석
|
|
segmented_options = segment_option_names(cleaned_option_names)
|
|
logger.debug(f"형태소 분석 결과: {segmented_options}")
|
|
|
|
# 3. 공통 단어 제거
|
|
unique_options = remove_common_words(segmented_options)
|
|
logger.debug(f"공통 단어 제거 후 고유한 옵션: {unique_options}")
|
|
|
|
# 4. 번역
|
|
translated_options = translate_options(unique_options)
|
|
logger.debug(f"번역된 옵션: {translated_options}")
|
|
|
|
return OptionResponse(translated_options=translated_options)
|
|
|
|
# 특수문자 처리 함수
|
|
def clean_special_characters(option_name, allowed_special_chars, special_char_replacements):
|
|
for char, replacement in special_char_replacements.items():
|
|
option_name = option_name.replace(char, replacement)
|
|
cleaned_name = ''.join([char for char in option_name if char.isalnum() or char in allowed_special_chars or char == ' '])
|
|
return cleaned_name
|
|
|
|
# 형태소 분석 함수
|
|
def segment_option_names(option_names):
|
|
segmented_options = []
|
|
for option in option_names:
|
|
segmented_words = word_segmentation(option)
|
|
segmented_options.append(segmented_words)
|
|
return segmented_options
|
|
|
|
# 공통 단어 제거 함수
|
|
def remove_common_words(segmented_options):
|
|
word_sets = [set(option) for option in segmented_options]
|
|
common_words = set.intersection(*word_sets)
|
|
unique_options = [[word for word in option if word not in common_words] for option in segmented_options]
|
|
return unique_options
|
|
|
|
# 번역 함수
|
|
def translate_options(unique_options):
|
|
translated_options = []
|
|
for option in unique_options:
|
|
translated_text = machine_translation(' '.join(option))
|
|
translated_options.append(translated_text)
|
|
return translated_options
|