TRNote/main.py

import flet as ft
import configparser
import os
import numpy as np
import wave
import tempfile
from modules.audio_source import AudioSource
from modules.speech_recognition import SpeechRecognizer
from modules.conversation_analyzer import ConversationAnalyzer
from modules.database_manager import DatabaseManager
from modules.gui_components import (
    AudioSourceSelector,
    StatusIndicator,
    ConversationView,
    PreviousConversationsList,
    TrainInfoPanel
)

def main(page: ft.Page):
    # 설정 파일 로드
    config = configparser.ConfigParser()

    # 기본 설정 생성
    if not os.path.exists("config.ini"):
        config["api"] = {
            "openai_api_key": "your_openai_api_key_here",
            "huggingface_api_key": "your_huggingface_api_key_here"
        }
        config["audio"] = {
            "sample_rate": "16000",
            "silence_threshold": "0.02",
            "silence_duration": "60",
            "buffer_duration": "3"
        }
        config["app"] = {
            "theme": "light"
        }
        config["model"] = {
            "provider": "huggingface",
            "name": "facebook/wav2vec2-base-960h"
        }

        with open("config.ini", "w") as configfile:
            config.write(configfile)
    else:
        config.read("config.ini")

    # 테마 설정
    theme = config.get("app", "theme", fallback="light")
    page.theme_mode = ft.ThemeMode.LIGHT if theme.lower() == "light" else ft.ThemeMode.DARK

    page.title = "철도 음성인식 시스템"
    page.window_width = 1200
    page.window_height = 800
    page.padding = 10

    # 인스턴스 생성
    audio_source = AudioSource()
    speech_recognizer = SpeechRecognizer()
    conversation_analyzer = ConversationAnalyzer()
    db_manager = DatabaseManager()

    # 테스트 모드 상태
    is_test_mode = False

    # GUI 컴포넌트 생성
    audio_source_selector = AudioSourceSelector(audio_source)
    status_indicator = StatusIndicator()
    conversation_view = ConversationView()
    previous_conversations_list = PreviousConversationsList(conversation_view)
    train_info_panel = TrainInfoPanel(db_manager)

    # 실시간 처리 상태 표시기
    realtime_indicator = ft.Text("", italic=True, color=ft.colors.GREY_600)

    # 파일 선택 콜백 함수 정의
    def on_file_picked(e):
        if e.files and len(e.files) > 0:
            file_path = e.files[0].path
            test_status.value = f"선택된 파일: {os.path.basename(file_path)}"
            page.update()

            # 선택된 파일 처리
            process_audio_file(file_path)

    # 테스트 모드 컨트롤
    test_mode_switch = ft.Switch(label="테스트 모드", value=False, on_change=lambda e: toggle_test_mode(e.control.value))
    file_picker = ft.FilePicker(on_result=on_file_picked)
    page.overlay.append(file_picker)

    test_file_button = ft.ElevatedButton(
        "MP3 파일 선택",
        icon=ft.icons.UPLOAD_FILE,
        on_click=lambda _: file_picker.pick_files(
            allow_multiple=False,
            allowed_extensions=["mp3", "wav"]
        ),
        disabled=True
    )

    # 테스트 모드 상태 표시
    test_status = ft.Text("", italic=True, color=ft.colors.ORANGE)

    # 레이아웃 구성
    left_panel = ft.Column([
        ft.Container(
            content=previous_conversations_list,
            height=320,
            border=ft.border.all(1, ft.colors.GREY_400),
            border_radius=5,
            padding=5
        ),
        ft.Container(
            content=conversation_view,
            height=480,
            border=ft.border.all(1, ft.colors.GREY_400),
            border_radius=5,
            padding=5,
            expand=True
        ),
    ], expand=True)

    right_panel = ft.Container(
        content=train_info_panel,
        border=ft.border.all(1, ft.colors.GREY_400),
        border_radius=5,
        padding=10,
        expand=True
    )

    # 상단 컨트롤에 설정 버튼 추가
    top_controls = ft.Row([
        audio_source_selector,
        status_indicator,
        ft.ElevatedButton("시작", on_click=lambda e: start_monitoring()),
        ft.ElevatedButton("중지", on_click=lambda e: stop_monitoring()),
        ft.IconButton(
            icon=ft.icons.SETTINGS,
            tooltip="설정",
            on_click=lambda e: show_settings_dialog()
        ),
    ])

    # 테스트 모드 컨트롤
    test_controls = ft.Row([
        test_mode_switch,
        test_file_button,
        test_status
    ], visible=True)

    # 메인 레이아웃에 실시간 인디케이터 추가
    main_layout = ft.Column([
        top_controls,
        test_controls,
        realtime_indicator,
        ft.Row([
            left_panel,
            right_panel,
        ], expand=True),
    ], expand=True)

    page.add(main_layout)

    # 테스트 모드 토글 함수
    def toggle_test_mode(value):
        nonlocal is_test_mode
        is_test_mode = value
        test_file_button.disabled = not value

        if value:
            test_status.value = "테스트 모드: 활성화됨"
            audio_source_selector.disabled = True
        else:
            test_status.value = "테스트 모드: 비활성화됨"
            audio_source_selector.disabled = False

        page.update()

    # 오디오 파일 처리
    def process_audio_file(file_path):
        try:
            status_indicator.set_status("파일 처리 중")
            status_indicator.set_detecting(True)
            page.update()

            # MP3 또는 WAV 파일 직접 처리
            text = speech_recognizer.recognize_file(file_path)

            # 변환 실패 시 MP3 파일인 경우 추가 처리
            if text is None and file_path.lower().endswith('.mp3'):
                try:
                    # MP3를 WAV로 변환 (librosa 사용)
                    import librosa

                    # MP3 파일 로드
                    audio_data, sample_rate = librosa.load(file_path, sr=16000, mono=True)

                    # WAV 파일로 변환
                    fd, temp_wav_path = tempfile.mkstemp(suffix=".wav")
                    os.close(fd)  # 파일 디스크립터 즉시 닫기

                    try:
                        # 16비트 정수로 변환
                        audio_data_int = (audio_data * 32767).astype(np.int16)

                        # WAV 파일 저장 - Wave_write 객체 사용
                        wf = wave.open(temp_wav_path, 'wb')
                        try:
                            wf.setnchannels(1)
                            wf.setsampwidth(2)  # 16-bit
                            wf.setframerate(16000)
                            wf.writeframes(audio_data_int.tobytes())
                        finally:
                            wf.close()  # 명시적으로 닫아줌

                        # OpenAI의 API를 사용해 음성 인식
                        text = speech_recognizer.recognize_file(temp_wav_path)
                    finally:
                        # 임시 파일 항상 삭제
                        if os.path.exists(temp_wav_path):
                            try:
                                os.unlink(temp_wav_path)
                            except Exception as e:
                                print(f"임시 파일 삭제 오류: {e}")
                except Exception as mp3_error:
                    print(f"MP3 처리 오류: {mp3_error}")

            # 인식 결과 처리
            if text:
                realtime_indicator.value = f"파일 인식 결과: {text[:100]}..."

                speakers = conversation_analyzer.identify_speakers(text)
                context = conversation_analyzer.analyze_conversation(text, speakers)

                conversation_view.update_conversation(text, speakers)
                previous_conversations_list.add_conversation(context)

                # 데이터베이스에서 관련 정보 검색
                train_info = db_manager.get_train_info(context)
                fault_info = db_manager.get_fault_info(context)

                train_info_panel.update_info(train_info, fault_info)
            else:
                realtime_indicator.value = "파일 인식 실패: 텍스트를 추출할 수 없습니다."

            status_indicator.set_detecting(False)
            status_indicator.set_status("대기 중")
            page.update()

        except Exception as e:
            print(f"오디오 파일 처리 오류: {e}")
            realtime_indicator.value = f"오류: {str(e)}"
            status_indicator.set_detecting(False)
            status_indicator.set_status("오류")
            page.update()

    # 음원 감시 관련 함수
    def start_monitoring():
        if is_test_mode:
            # 테스트 모드에서는 파일 선택을 유도
            file_picker.pick_files(
                allow_multiple=False,
                allowed_extensions=["mp3", "wav"]
            )
            return

        try:
            selected_source = audio_source_selector.get_selected_source()
            if not selected_source:
                page.snack_bar = ft.SnackBar(ft.Text("음원 소스를 선택해주세요"))
                page.snack_bar.open = True
                page.update()
                return

            status_indicator.set_status("녹음 중")
            # 일반 및 실시간 콜백 함수 전달
            audio_source.start_recording(selected_source, on_audio_data, on_realtime_audio_data)
            page.update()
        except Exception as e:
            print(f"녹음 시작 오류: {e}")
            page.snack_bar = ft.SnackBar(ft.Text(f"녹음 시작 오류: {e}"))
            page.snack_bar.open = True
            page.update()

    def stop_monitoring():
        try:
            audio_source.stop_recording()

            # 실시간 인식 중지
            if hasattr(speech_recognizer, 'stop_realtime_recognition'):
                speech_recognizer.stop_realtime_recognition()

            status_indicator.set_status("대기 중")
            realtime_indicator.value = ""
            page.update()
        except Exception as e:
            print(f"녹음 중지 오류: {e}")

    def on_audio_data(audio_data):
        """완성된 오디오 데이터 처리 콜백"""
        try:
            if len(audio_data) == 0:
                return

            status_indicator.set_detecting(True)
            page.update()

            text = speech_recognizer.recognize(audio_data)
            if text:
                speakers = conversation_analyzer.identify_speakers(text)
                context = conversation_analyzer.analyze_conversation(text, speakers)

                conversation_view.update_conversation(text, speakers)
                previous_conversations_list.add_conversation(context)

                # 데이터베이스에서 관련 정보 검색
                train_info = db_manager.get_train_info(context)
                fault_info = db_manager.get_fault_info(context)

                train_info_panel.update_info(train_info, fault_info)

            status_indicator.set_detecting(False)
            page.update()
        except Exception as e:
            print(f"오디오 데이터 처리 오류: {e}")
            status_indicator.set_detecting(False)
            page.update()

    def on_realtime_audio_data(audio_data):
        """실시간 오디오 데이터 처리 콜백"""
        try:
            if len(audio_data) == 0:
                return

            status_indicator.set_detecting(True)

            # 실시간 인식 결과 처리 콜백
            def on_realtime_result(text):
                if text:
                    realtime_indicator.value = f"실시간 인식 중: {text[:100]}..."
                    page.update()

            # 실시간 처리 지원 확인
            if hasattr(speech_recognizer, 'add_audio_data') and hasattr(speech_recognizer, 'start_realtime_recognition'):
                # 큐에 오디오 데이터 추가
                speech_recognizer.add_audio_data(audio_data)

                # 처리가 시작되지 않았으면 시작
                if hasattr(speech_recognizer, 'is_processing') and not speech_recognizer.is_processing:
                    speech_recognizer.start_realtime_recognition(on_realtime_result)
            else:
                # 실시간 변환을 지원하지 않는 경우 일반 변환 사용
                text = speech_recognizer.recognize(audio_data)
                if text:
                    realtime_indicator.value = f"감지: {text[:100]}..."

            page.update()
        except Exception as e:
            print(f"실시간 오디오 처리 오류: {e}")

    def show_settings_dialog():
        """설정 대화상자 표시"""
        try:
            # API 키 입력 필드
            openai_api_key_field = ft.TextField(
                label="OpenAI API 키",
                value=speech_recognizer.api_key,
                password=True,
                width=400
            )

            # Hugging Face API 키 입력 필드
            hf_api_key_field = ft.TextField(
                label="Hugging Face API 키",
                value=speech_recognizer.hf_api_key,
                password=True,
                width=400
            )

            # 모델 선택 관련 설정
            # 모델 제공자 선택
            model_provider_radio = ft.RadioGroup(
                content=ft.Column([
                    ft.Radio(value="huggingface", label="Hugging Face (한국어 음성인식 권장)"),
                    ft.Radio(value="openai", label="OpenAI Whisper (백업)"),
                    ft.Radio(value="vosk", label="VOSK 오프라인 (완전 오프라인)"),
                ]),
                value=speech_recognizer.model_provider
            )

            # 음성인식 모델 선택 드롭다운
            model_options = [
                ft.dropdown.Option("vosk-model-small-kr", "VOSK 모델 - 완전 오프라인 한국어 소형 모델"),
                ft.dropdown.Option("kresnik/wav2vec2-large-xlsr-korean", "한국어 특화 음성인식 모델"),
                ft.dropdown.Option("openai/whisper-small", "openai 한국어 특화 Whisper 소형 모델"),
                ft.dropdown.Option("openai/whisper-medium", "openai 한국어 특화 Whisper 중형 모델"),
                ft.dropdown.Option("openai/whisper-large-v3", "openai 한국어 특화 Whisper 대형 모델"),
                ft.dropdown.Option("facebook/wav2vec2-base-960h", "영어 음성인식 기본 모델")
            ]

            model_dropdown = ft.Dropdown(
                label="음성인식 모델 선택",
                width=400,
                options=model_options,
                value=speech_recognizer.model_name,
            )

            # 오디오 설정 슬라이더
            silence_threshold_slider = ft.Slider(
                min=0.01,
                max=0.1,
                divisions=9,
                value=float(config.get("audio", "silence_threshold", fallback="0.02")),
                label="{value}",
                on_change=lambda e: update_slider_label(e)
            )

            silence_threshold_text = ft.Text(f"소리 감지 임계값: {silence_threshold_slider.value}")

            silence_duration_slider = ft.Slider(
                min=10,
                max=120,
                divisions=11,
                value=float(config.get("audio", "silence_duration", fallback="60")),
                label="{value}",
                on_change=lambda e: update_duration_label(e)
            )

            silence_duration_text = ft.Text(f"대화 구분 시간(초): {silence_duration_slider.value}")

            buffer_duration_slider = ft.Slider(
                min=1,
                max=10,
                divisions=9,
                value=float(config.get("audio", "buffer_duration", fallback="3")),
                label="{value}",
                on_change=lambda e: update_buffer_label(e)
            )

            buffer_duration_text = ft.Text(f"버퍼 길이(초): {buffer_duration_slider.value}")

            # 슬라이더 레이블 업데이트 함수
            def update_slider_label(e):
                silence_threshold_text.value = f"소리 감지 임계값: {e.control.value:.2f}"
                page.update()

            def update_duration_label(e):
                silence_duration_text.value = f"대화 구분 시간(초): {e.control.value:.0f}"
                page.update()

            def update_buffer_label(e):
                buffer_duration_text.value = f"버퍼 길이(초): {e.control.value:.0f}"
                page.update()

            # 테마 선택 라디오 버튼
            theme_radio = ft.RadioGroup(
                content=ft.Column([
                    ft.Radio(value="light", label="밝은 테마"),
                    ft.Radio(value="dark", label="어두운 테마"),
                ]),
                value=theme
            )

            # 설정 저장 함수
            def save_settings(e):
                try:
                    # API 키 저장
                    if openai_api_key_field.value:
                        speech_recognizer.set_api_key(openai_api_key_field.value)

                    # Hugging Face API 키 저장
                    if hf_api_key_field.value:
                        speech_recognizer.set_huggingface_api_key(hf_api_key_field.value)

                    # 모델 설정 저장
                    if model_provider_radio.value and model_dropdown.value:
                        speech_recognizer.set_model(
                            model_provider_radio.value,
                            model_dropdown.value
                        )

                    # 오디오 설정 저장
                    if hasattr(audio_source, 'update_settings'):
                        audio_source.update_settings(
                            silence_threshold=silence_threshold_slider.value,
                            silence_duration=silence_duration_slider.value,
                            buffer_duration=buffer_duration_slider.value
                        )
                    else:
                        # 직접 속성 설정
                        audio_source.silence_threshold = silence_threshold_slider.value
                        audio_source.silence_duration = silence_duration_slider.value
                        audio_source.buffer_duration = buffer_duration_slider.value

                        # 설정 저장
                        config.set("audio", "silence_threshold", str(silence_threshold_slider.value))
                        config.set("audio", "silence_duration", str(silence_duration_slider.value))
                        config.set("audio", "buffer_duration", str(buffer_duration_slider.value))

                    # 테마 설정 저장
                    config.set("app", "theme", theme_radio.value)
                    with open("config.ini", "w") as config_file:
                        config.write(config_file)

                    # 테마 적용
                    page.theme_mode = ft.ThemeMode.LIGHT if theme_radio.value.lower() == "light" else ft.ThemeMode.DARK

                    # 대화상자 닫기
                    dialog.open = False
                    page.update()

                    # 설정 적용 알림
                    page.snack_bar = ft.SnackBar(ft.Text("설정이 저장되었습니다"))
                    page.snack_bar.open = True
                    page.update()
                except Exception as e:
                    print(f"설정 저장 오류: {e}")
                    page.snack_bar = ft.SnackBar(ft.Text(f"설정 저장 오류: {e}"))
                    page.snack_bar.open = True
                    page.update()

            # 설정 대화상자
            dialog = ft.AlertDialog(
                title=ft.Text("설정"),
                content=ft.Column([
                    ft.Text("API 설정", weight=ft.FontWeight.BOLD),
                    ft.Text("OpenAI API (백업용)", size=14),
                    openai_api_key_field,
                    ft.Divider(height=10),
                    ft.Text("Hugging Face API (권장)", size=14),
                    hf_api_key_field,

                    ft.Divider(height=20),
                    ft.Text("음성인식 모델 설정", weight=ft.FontWeight.BOLD),
                    ft.Text("API 제공자 선택", size=14),
                    model_provider_radio,
                    ft.Text("음성인식 모델 선택", size=14),
                    model_dropdown,

                    ft.Divider(height=20),
                    ft.Text("오디오 설정", weight=ft.FontWeight.BOLD),
                    silence_threshold_text,
                    silence_threshold_slider,
                    silence_duration_text,
                    silence_duration_slider,
                    buffer_duration_text,
                    buffer_duration_slider,

                    ft.Divider(height=20),
                    ft.Text("앱 설정", weight=ft.FontWeight.BOLD),
                    ft.Text("테마", size=14),
                    theme_radio,
                ], scroll=ft.ScrollMode.AUTO, height=600),
                actions=[
                    ft.TextButton("취소", on_click=lambda e: setattr(dialog, "open", False)),
                    ft.TextButton("저장", on_click=save_settings),
                ],
                actions_alignment=ft.MainAxisAlignment.END,
            )

            # 대화상자 표시
            page.dialog = dialog
            dialog.open = True
            page.update()
        except Exception as e:
            print(f"설정 대화상자 표시 오류: {e}")

if __name__ == "__main__":
    ft.app(target=main)