TRNote/modules/audio_source.py

import sounddevice as sd
import numpy as np
import threading
import time
import configparser
import os
from typing import Callable, List, Dict, Any

class AudioSource:
    def __init__(self, config_path="config.ini"):
        # 설정 파일 로드
        self.config = configparser.ConfigParser()

        if os.path.exists(config_path):
            self.config.read(config_path)

        self.is_recording = False
        self.stream = None
        self.callback = None
        self.realtime_callback = None
        self.thread = None

        # 설정 파일에서 오디오 설정 읽기
        self.silence_threshold = self.config.getfloat("audio", "silence_threshold", fallback=0.02)
        self.silence_duration = self.config.getfloat("audio", "silence_duration", fallback=60)
        self.buffer_duration = self.config.getfloat("audio", "buffer_duration", fallback=3)
        self.sample_rate = self.config.getint("audio", "sample_rate", fallback=16000)

        self.last_sound_time = 0
        self.buffer = np.array([])
        self.realtime_buffer = np.array([])
        self.realtime_buffer_size = int(self.sample_rate * 2)  # 2초 버퍼
        self.last_realtime_process = 0
        self.realtime_interval = 2.0  # 2초마다 실시간 처리

    def get_available_sources(self) -> List[Dict[str, Any]]:
        """사용 가능한 모든 오디오 소스 목록을 반환합니다."""
        try:
            devices = sd.query_devices()
            sources = []

            for i, device in enumerate(devices):
                if isinstance(device, dict) and device.get('max_input_channels', 0) > 0:
                    sources.append({
                        'id': i,
                        'name': device['name'],
                        'channels': device['max_input_channels'],
                        'type': self._determine_device_type(device['name'])
                    })

            # 윈도우 사운드 출력 캡처 옵션 추가
            sources.append({
                'id': 'wasapi_loopback',
                'name': '시스템 소리 출력 (WASAPI Loopback)',
                'channels': 2,
                'type': 'system_output'
            })

            return sources
        except Exception as e:
            print(f"오디오 소스 목록 가져오기 오류: {e}")
            # 기본 소스라도 반환
            return [{
                'id': 0,
                'name': '기본 마이크',
                'channels': 1,
                'type': 'microphone'
            }]

    def _determine_device_type(self, name: str) -> str:
        """장치 이름을 기반으로 장치 유형을 결정합니다."""
        name_lower = name.lower()

        if 'mic' in name_lower or 'microphone' in name_lower:
            return 'microphone'
        elif 'line' in name_lower and 'in' in name_lower:
            return 'line_in'
        elif 'speaker' in name_lower or 'headphone' in name_lower:
            return 'speaker'
        else:
            return 'unknown'

    def start_recording(self, source_id, callback: Callable, realtime_callback: Callable = None):
        """오디오 소스 녹음을 시작합니다."""
        if self.is_recording:
            self.stop_recording()

        self.callback = callback
        self.realtime_callback = realtime_callback
        self.is_recording = True
        self.buffer = np.array([])
        self.realtime_buffer = np.array([])
        self.last_sound_time = time.time()
        self.last_realtime_process = time.time()

        try:
            # WASAPI 루프백일 경우 특별 처리
            if source_id == 'wasapi_loopback':
                try:
                    device_info = sd.query_devices()
                    device_idx = None

                    # 출력 장치 찾기
                    for i, device in enumerate(device_info):
                        if isinstance(device, dict) and device.get('hostapi', None) == 0 and device.get('max_output_channels', 0) > 0:
                            device_idx = i
                            break

                    if device_idx is None:
                        device_idx = sd.default.device[1]  # 기본 출력 장치

                    self.stream = sd.InputStream(
                        device=f'{device_idx}:loopback',
                        samplerate=self.sample_rate,
                        channels=2,
                        callback=self._audio_callback
                    )
                except Exception as e:
                    print(f"WASAPI 루프백 설정 오류: {e}")
                    # 기본 장치로 대체
                    self.stream = sd.InputStream(
                        samplerate=self.sample_rate,
                        channels=1,
                        callback=self._audio_callback
                    )
            else:
                # 일반 입력 장치
                try:
                    self.stream = sd.InputStream(
                        device=source_id,
                        samplerate=self.sample_rate,
                        channels=1,
                        callback=self._audio_callback
                    )
                except Exception as e:
                    print(f"입력 장치 설정 오류: {e}")
                    # 기본 장치로 대체
                    self.stream = sd.InputStream(
                        samplerate=self.sample_rate,
                        channels=1,
                        callback=self._audio_callback
                    )

            self.stream.start()

            # 침묵 감지 스레드 시작
            self.thread = threading.Thread(target=self._silence_detection_thread)
            self.thread.daemon = True
            self.thread.start()

        except Exception as e:
            print(f"녹음 시작 오류: {e}")
            self.is_recording = False

    def stop_recording(self):
        """녹음을 중지합니다."""
        self.is_recording = False

        if self.stream:
            try:
                self.stream.stop()
                self.stream.close()
            except Exception as e:
                print(f"녹음 중지 오류: {e}")
            finally:
                self.stream = None

        self.thread = None

    def _audio_callback(self, indata, frames, time, status):
        """오디오 데이터가 들어올 때 호출되는 콜백 함수입니다."""
        if status:
            print(f"상태: {status}")

        try:
            # 모노로 변환 (필요한 경우)
            if indata.shape[1] > 1:
                audio_data = np.mean(indata, axis=1)
            else:
                audio_data = indata[:, 0]

            # 음량 레벨 계산
            volume_norm = np.linalg.norm(audio_data) / len(audio_data)

            # 소리가 감지된 경우
            if volume_norm > self.silence_threshold:
                self.last_sound_time = time.time()

                # 메인 버퍼에 데이터 추가
                if len(self.buffer) == 0:
                    self.buffer = audio_data
                else:
                    self.buffer = np.append(self.buffer, audio_data)

                # 실시간 버퍼에 데이터 추가
                if len(self.realtime_buffer) == 0:
                    self.realtime_buffer = audio_data
                else:
                    self.realtime_buffer = np.append(self.realtime_buffer, audio_data)

                # 실시간 버퍼가 최대 크기를 초과하면 오래된 데이터 제거
                if len(self.realtime_buffer) > self.realtime_buffer_size:
                    self.realtime_buffer = self.realtime_buffer[-self.realtime_buffer_size:]

                # 실시간 처리 (일정 시간 간격으로)
                current_time = time.time()
                if (current_time - self.last_realtime_process) > self.realtime_interval and self.realtime_callback:
                    self.last_realtime_process = current_time

                    # 데이터 복사 후 처리
                    realtime_data = self.realtime_buffer.copy()
                    if self.realtime_callback:
                        # 별도 쓰레드에서 실행하지 않고 직접 콜백 호출
                        self.realtime_callback(realtime_data)

                # 메인 버퍼가 최대 길이를 초과하면 콜백 호출 및 버퍼 초기화
                max_buffer_size = self.sample_rate * self.buffer_duration
                if len(self.buffer) >= max_buffer_size:
                    if self.callback:
                        buffer_copy = self.buffer.copy()
                        self.callback(buffer_copy)
                    self.buffer = np.array([])
        except Exception as e:
            print(f"오디오 처리 오류: {e}")

    def _silence_detection_thread(self):
        """침묵 감지 스레드입니다. 침묵이 일정 시간 이상 지속되면 현재 버퍼의 데이터를 처리합니다."""
        while self.is_recording:
            try:
                current_time = time.time()

                # 현재 버퍼에 데이터가 있고 일정 시간 동안 소리가 없는 경우
                if len(self.buffer) > 0 and (current_time - self.last_sound_time) > 1.0:
                    if self.callback:
                        buffer_copy = self.buffer.copy()
                        self.callback(buffer_copy)
                    self.buffer = np.array([])

                    # 실시간 버퍼도 처리하고 초기화
                    if self.realtime_callback and len(self.realtime_buffer) > 0:
                        realtime_data = self.realtime_buffer.copy()
                        self.realtime_callback(realtime_data)
                        self.realtime_buffer = np.array([])

                # 장시간 침묵이 지속되면 새로운 대화로 간주
                if (current_time - self.last_sound_time) > self.silence_duration:
                    # 새로운 대화 시작을 알림
                    pass

                time.sleep(0.5)  # 스레드 부하 감소
            except Exception as e:
                print(f"침묵 감지 오류: {e}")
                time.sleep(1)  # 오류 시 더 긴 대기

    def update_settings(self, silence_threshold=None, silence_duration=None, buffer_duration=None):
        """오디오 설정을 업데이트합니다."""
        if silence_threshold is not None:
            self.silence_threshold = silence_threshold
            self.config.set("audio", "silence_threshold", str(silence_threshold))

        if silence_duration is not None:
            self.silence_duration = silence_duration
            self.config.set("audio", "silence_duration", str(silence_duration))

        if buffer_duration is not None:
            self.buffer_duration = buffer_duration
            self.config.set("audio", "buffer_duration", str(buffer_duration))

        # 설정 파일 저장
        with open("config.ini", "w") as config_file:
            self.config.write(config_file)