TRNote/modules/speech_recognition.py

import numpy as np
import os
import tempfile
import openai
import configparser
import wave
import struct
import threading
import queue
import time
from typing import Optional, Callable

class SpeechRecognizer:
    def __init__(self, config_path="config.ini"):
        self.config = configparser.ConfigParser()

        if os.path.exists(config_path):
            self.config.read(config_path)

        self.api_key = self.config.get("api", "openai_api_key", fallback="")

        if not self.api_key or self.api_key == "your_openai_api_key_here":
            print("경고: OpenAI API 키가 설정되지 않았습니다. config.ini 파일을 확인하세요.")

        openai.api_key = self.api_key

        # 실시간 처리를 위한 설정
        self.is_processing = False
        self.audio_queue = queue.Queue()
        self.process_thread = None
        self.callback = None

    def recognize(self, audio_data: np.ndarray) -> Optional[str]:
        """오디오 데이터를 텍스트로 변환합니다."""
        if len(audio_data) == 0:
            return None

        try:
            # 임시 파일로 오디오 저장
            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
                # WAV 파일 형식으로 저장
                with wave.open(temp_file.name, 'wb') as wf:
                    wf.setnchannels(1)
                    wf.setsampwidth(2)  # 16-bit
                    wf.setframerate(16000)

                    # float32 데이터를 int16으로 변환
                    audio_data_int = (audio_data * 32767).astype(np.int16)
                    wf.writeframes(audio_data_int.tobytes())

                # OpenAI Whisper API를 사용하여 음성 인식
                with open(temp_file.name, "rb") as audio_file:
                    try:
                        if not self.api_key:
                            print("API 키가 설정되지 않았습니다.")
                            return None

                        # 최신 OpenAI API 사용 방식
                        try:
                            client = openai.OpenAI(api_key=self.api_key)
                            result = client.audio.transcriptions.create(
                                model="whisper-1",
                                file=audio_file,
                                language="ko",
                                response_format="text"
                            )
                            text_result = result if isinstance(result, str) else result.text
                        except (AttributeError, ImportError, NameError):
                            # 이전 버전 OpenAI 라이브러리 지원 (최대한 호환성 유지)
                            try:
                                # audio 모듈 사용 (최신 버전)
                                result = openai.audio.transcriptions.create(
                                    model="whisper-1",
                                    file=audio_file,
                                    language="ko",
                                    response_format="text"
                                )
                                text_result = result if isinstance(result, str) else result.text
                            except (AttributeError, ImportError, NameError):
                                # Audio 클래스 사용 (이전 버전)
                                try:
                                    result = openai.Audio.transcribe(
                                        model="whisper-1",
                                        file=audio_file,
                                        language="ko",
                                        response_format="text"
                                    )
                                    text_result = result if isinstance(result, str) else result.get("text", "")
                                except:
                                    print("OpenAI API 호출 방식을 찾을 수 없습니다.")
                                    return None

                        # 임시 파일 삭제
                        os.unlink(temp_file.name)

                        return text_result

                    except Exception as e:
                        print(f"API 호출 오류: {e}")
                        # 임시 파일 삭제 시도
                        try:
                            os.unlink(temp_file.name)
                        except:
                            pass
                        return None

        except Exception as e:
            print(f"음성 인식 오류: {e}")
            return None

    def recognize_file(self, file_path: str) -> Optional[str]:
        """오디오 파일을 텍스트로 변환합니다."""
        if not os.path.exists(file_path):
            print(f"파일이 존재하지 않습니다: {file_path}")
            return None

        try:
            # OpenAI Whisper API를 사용하여 음성 인식
            with open(file_path, "rb") as audio_file:
                try:
                    if not self.api_key:
                        print("API 키가 설정되지 않았습니다.")
                        return None

                    # 최신 OpenAI API 사용 방식
                    try:
                        client = openai.OpenAI(api_key=self.api_key)
                        result = client.audio.transcriptions.create(
                            model="whisper-1",
                            file=audio_file,
                            language="ko",
                            response_format="text"
                        )
                        text_result = result if isinstance(result, str) else result.text
                    except (AttributeError, ImportError, NameError):
                        # 이전 버전 OpenAI 라이브러리 지원 (최대한 호환성 유지)
                        try:
                            # audio 모듈 사용 (최신 버전)
                            result = openai.audio.transcriptions.create(
                                model="whisper-1",
                                file=audio_file,
                                language="ko",
                                response_format="text"
                            )
                            text_result = result if isinstance(result, str) else result.text
                        except (AttributeError, ImportError, NameError):
                            # Audio 클래스 사용 (이전 버전)
                            try:
                                result = openai.Audio.transcribe(
                                    model="whisper-1",
                                    file=audio_file,
                                    language="ko",
                                    response_format="text"
                                )
                                text_result = result if isinstance(result, str) else result.get("text", "")
                            except:
                                print("OpenAI API 호출 방식을 찾을 수 없습니다.")
                                return None

                    return text_result

                except Exception as e:
                    print(f"파일 API 호출 오류: {e}")
                    return None

        except Exception as e:
            print(f"파일 음성 인식 오류: {e}")
            return None

    def start_realtime_recognition(self, result_callback: Callable[[str], None]):
        """실시간 음성 인식을 시작합니다."""
        if self.is_processing:
            return

        self.is_processing = True
        self.callback = result_callback

        # 오디오 처리 스레드 시작
        self.process_thread = threading.Thread(target=self._process_audio_queue)
        self.process_thread.daemon = True
        self.process_thread.start()

    def stop_realtime_recognition(self):
        """실시간 음성 인식을 중지합니다."""
        self.is_processing = False

        # 큐 비우기
        while not self.audio_queue.empty():
            try:
                self.audio_queue.get_nowait()
            except queue.Empty:
                break

        self.process_thread = None

    def add_audio_data(self, audio_data: np.ndarray):
        """오디오 큐에 데이터를 추가합니다."""
        if self.is_processing:
            self.audio_queue.put(audio_data)

    def _process_audio_queue(self):
        """백그라운드에서 오디오 큐의 데이터를 처리합니다."""
        while self.is_processing:
            try:
                # 큐에서 오디오 데이터 가져오기 (1초 타임아웃)
                audio_data = self.audio_queue.get(timeout=1.0)

                # 오디오 데이터 변환
                result = self.recognize(audio_data)

                # 결과가 있으면 콜백 호출
                if result and self.callback:
                    self.callback(result)

            except queue.Empty:
                # 타임아웃 - 계속 진행
                time.sleep(0.1)
                continue
            except Exception as e:
                print(f"오디오 처리 오류: {e}")
                time.sleep(0.5)  # 오류 시 잠시 대기

    def set_api_key(self, api_key: str):
        """OpenAI API 키를 설정합니다."""
        self.api_key = api_key
        openai.api_key = api_key

        # config.ini 파일 업데이트
        self.config.set("api", "openai_api_key", api_key)
        try:
            with open("config.ini", "w") as config_file:
                self.config.write(config_file)
        except Exception as e:
            print(f"설정 파일 저장 오류: {e}")