36 lines
1.8 KiB
Python
36 lines
1.8 KiB
Python
import re
|
|
from collections import Counter
|
|
|
|
def parse_and_extract(option_names):
|
|
def split_brackets(text):
|
|
# 괄호 내부와 외부 내용 분리
|
|
outside, inside = re.sub(r'\(.*?\)', '', text).strip(), re.findall(r'\((.*?)\)', text)
|
|
return outside.strip(), ' '.join(inside).strip() if inside else ''
|
|
|
|
parsed_options = [split_brackets(option) for option in option_names]
|
|
outside_words = [part.split() for part, _ in parsed_options]
|
|
inside_words = [part.split() for _, part in parsed_options if part] # 괄호 내용이 있는 경우만 처리
|
|
|
|
def find_common_words(words_list):
|
|
# 모든 단어를 카운트하여 공통 단어 추출
|
|
word_count = Counter(word for words in words_list for word in words)
|
|
num_options = len(words_list)
|
|
common_words = [word for word in set(word for words in words_list for word in words) if word_count[word] == num_options]
|
|
common_ordered = sorted(common_words, key=lambda x: next((i for i, lst in enumerate(words_list) for j in lst if x == j), None))
|
|
return common_ordered
|
|
|
|
common_outside_words = find_common_words(outside_words)
|
|
common_inside_words = find_common_words(inside_words)
|
|
|
|
def filter_words(words, common_words):
|
|
return ' '.join(word for word in words if word not in common_words)
|
|
|
|
unique_outsides = [filter_words(words, common_outside_words) for words in outside_words]
|
|
unique_insides = [filter_words(words, common_inside_words) for words in inside_words]
|
|
|
|
# 괄호 내용이 있을 때만 괄호 추가
|
|
unique_options = [outside + (' (' + inside + ')' if inside else '') for outside, inside in zip(unique_outsides, unique_insides)]
|
|
|
|
common_words = common_inside_words + common_outside_words
|
|
return unique_options, common_words
|