AutoPercenty/ai/split.py

36 lines
1.8 KiB
Python

import re
from collections import Counter
def parse_and_extract(option_names):
def split_brackets(text):
# 괄호 내부와 외부 내용 분리
outside, inside = re.sub(r'\(.*?\)', '', text).strip(), re.findall(r'\((.*?)\)', text)
return outside.strip(), ' '.join(inside).strip() if inside else ''
parsed_options = [split_brackets(option) for option in option_names]
outside_words = [part.split() for part, _ in parsed_options]
inside_words = [part.split() for _, part in parsed_options if part] # 괄호 내용이 있는 경우만 처리
def find_common_words(words_list):
# 모든 단어를 카운트하여 공통 단어 추출
word_count = Counter(word for words in words_list for word in words)
num_options = len(words_list)
common_words = [word for word in set(word for words in words_list for word in words) if word_count[word] == num_options]
common_ordered = sorted(common_words, key=lambda x: next((i for i, lst in enumerate(words_list) for j in lst if x == j), None))
return common_ordered
common_outside_words = find_common_words(outside_words)
common_inside_words = find_common_words(inside_words)
def filter_words(words, common_words):
return ' '.join(word for word in words if word not in common_words)
unique_outsides = [filter_words(words, common_outside_words) for words in outside_words]
unique_insides = [filter_words(words, common_inside_words) for words in inside_words]
# 괄호 내용이 있을 때만 괄호 추가
unique_options = [outside + (' (' + inside + ')' if inside else '') for outside, inside in zip(unique_outsides, unique_insides)]
common_words = common_inside_words + common_outside_words
return unique_options, common_words