API_for_Paddle/main.py

88 lines
3.1 KiB
Python
Executable File

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List
import os
from paddlenlp import Taskflow
import logging
# FastAPI 애플리케이션 초기화
app = FastAPI()
# PaddleNLP Taskflow 번역기 및 형태소 분석기 초기화
machine_translation = Taskflow("text2text_generation", source_lang="zh", target_lang="ko", from_hf_hub=True)
word_segmentation = Taskflow("word_segmentation")
# 로거 설정
logger = logging.getLogger('default_logger')
# Pydantic 모델 정의 (API 요청과 응답을 위한 데이터 구조)
class OptionRequest(BaseModel):
options: List[str]
class OptionResponse(BaseModel):
translated_options: List[str]
# 특수문자와 대체할 문자 정의
allowed_special_chars = "!$~()._-=+/"
special_char_replacements = {
"*" : "X", # '*' 특수문자를 'X'로 대체
"" : "(",
"" : ")",
"[" : "(",
"]" : ")",
"," : ".",
}
# 옵션명을 처리하고 번역하는 API
@app.post("/process_options", response_model=OptionResponse)
async def process_options(option_request: OptionRequest):
options = option_request.options
# 1. 특수문자 처리
cleaned_option_names = [clean_special_characters(option, allowed_special_chars, special_char_replacements) for option in options]
logger.debug(f"특수문자 처리 후 옵션명: {cleaned_option_names}")
# 2. 옵션명 형태소 분석
segmented_options = segment_option_names(cleaned_option_names)
logger.debug(f"형태소 분석 결과: {segmented_options}")
# 3. 공통 단어 제거
unique_options = remove_common_words(segmented_options)
logger.debug(f"공통 단어 제거 후 고유한 옵션: {unique_options}")
# 4. 번역
translated_options = translate_options(unique_options)
logger.debug(f"번역된 옵션: {translated_options}")
return OptionResponse(translated_options=translated_options)
# 특수문자 처리 함수
def clean_special_characters(option_name, allowed_special_chars, special_char_replacements):
for char, replacement in special_char_replacements.items():
option_name = option_name.replace(char, replacement)
cleaned_name = ''.join([char for char in option_name if char.isalnum() or char in allowed_special_chars or char == ' '])
return cleaned_name
# 형태소 분석 함수
def segment_option_names(option_names):
segmented_options = []
for option in option_names:
segmented_words = word_segmentation(option)
segmented_options.append(segmented_words)
return segmented_options
# 공통 단어 제거 함수
def remove_common_words(segmented_options):
word_sets = [set(option) for option in segmented_options]
common_words = set.intersection(*word_sets)
unique_options = [[word for word in option if word not in common_words] for option in segmented_options]
return unique_options
# 번역 함수
def translate_options(unique_options):
translated_options = []
for option in unique_options:
translated_text = machine_translation(' '.join(option))
translated_options.append(translated_text)
return translated_options