""" 동적 메모리 기반 세션풀 관리 시스템 - GPU 메모리 자동 감지 - LAMA:MIGAN 비율 기반 최적 세션풀 구성 - 실시간 메모리 모니터링 및 동적 조정 """ import logging import time from typing import Dict, Tuple, Optional, Any from dataclasses import dataclass from enum import Enum import torch import psutil class GPUTier(Enum): """GPU 등급 분류""" RTX_3050_4GB = "rtx_3050_4gb" RTX_3060_12GB = "rtx_3060_12gb" RTX_3070_8GB = "rtx_3070_8gb" RTX_3080_10GB = "rtx_3080_10gb" RTX_3090_24GB = "rtx_3090_24gb" RTX_4060_16GB = "rtx_4060_16gb" RTX_4070_12GB = "rtx_4070_12gb" RTX_4080_16GB = "rtx_4080_16gb" RTX_4090_24GB = "rtx_4090_24gb" UNKNOWN = "unknown" @dataclass class SessionPoolConfig: """세션풀 구성""" migan_sessions: int lama_sessions: int ocr_sessions: int workers: int total_vram_usage_mb: int safety_margin_mb: int expected_concurrent_clients: int @property def total_sessions(self) -> int: return self.migan_sessions + self.lama_sessions + self.ocr_sessions @property def lama_migan_ratio(self) -> str: total = self.lama_sessions + self.migan_sessions if total == 0: return "0:0" lama_pct = round((self.lama_sessions / total) * 10) migan_pct = 10 - lama_pct return f"{lama_pct}:{migan_pct}" @property def worker_session_ratio(self) -> float: if self.total_sessions == 0: return 0.0 return self.workers / self.total_sessions @dataclass class MemoryInfo: """메모리 정보""" total_vram_mb: int available_vram_mb: int used_vram_mb: int gpu_name: str gpu_tier: GPUTier cuda_capability: str supports_fp16: bool supports_tensor_cores: bool class DynamicSessionPoolManager: """동적 세션풀 관리자""" # 🎯 세션별 메모리 사용량 (MB) MEMORY_USAGE = { 'migan_session': 1200, # MIGAN 웜업 후 메모리 'lama_session': 500, # LAMA 세션 메모리 'ocr_session': 400, # OCR 세션 메모리 'system_overhead': 500, # 시스템 오버헤드 } # 🎯 GPU별 최적 구성 템플릿 GPU_TEMPLATES = { GPUTier.RTX_4080_16GB: { 'target_vram_usage_pct': 70, 'lama_migan_ratio': (4, 6), # 4:6 'worker_session_ratio': 3.0, 'max_concurrent_clients': 50, }, GPUTier.RTX_3090_24GB: { 'target_vram_usage_pct': 70, 'lama_migan_ratio': (3, 7), # 3:7 'worker_session_ratio': 3.0, 'max_concurrent_clients': 70, }, GPUTier.RTX_3080_10GB: { 'target_vram_usage_pct': 65, 'lama_migan_ratio': (5, 5), # 5:5 'worker_session_ratio': 3.5, 'max_concurrent_clients': 30, }, GPUTier.RTX_3070_8GB: { 'target_vram_usage_pct': 60, 'lama_migan_ratio': (6, 4), # 6:4 (LAMA 중심) 'worker_session_ratio': 4.0, 'max_concurrent_clients': 20, }, GPUTier.RTX_3050_4GB: { 'target_vram_usage_pct': 55, 'lama_migan_ratio': (8, 2), # 8:2 (LAMA 중심) 'worker_session_ratio': 5.0, 'max_concurrent_clients': 10, }, } def __init__(self, logger: Optional[logging.Logger] = None): self.logger = logger or logging.getLogger(__name__) self._memory_info: Optional[MemoryInfo] = None self._current_config: Optional[SessionPoolConfig] = None self._last_memory_check = 0 self._memory_check_interval = 30 # 30초마다 메모리 체크 def detect_gpu_info(self) -> MemoryInfo: """GPU 정보 자동 감지""" if not torch.cuda.is_available(): raise RuntimeError("CUDA GPU를 사용할 수 없습니다") device = torch.cuda.current_device() props = torch.cuda.get_device_properties(device) # GPU 메모리 정보 total_memory = torch.cuda.get_device_properties(device).total_memory total_vram_mb = int(total_memory / (1024 * 1024)) # 현재 사용 중인 메모리 torch.cuda.empty_cache() # 캐시 정리 allocated = torch.cuda.memory_allocated(device) reserved = torch.cuda.memory_reserved(device) used_vram_mb = int(max(allocated, reserved) / (1024 * 1024)) available_vram_mb = total_vram_mb - used_vram_mb # GPU 이름 및 등급 판정 gpu_name = props.name gpu_tier = self._classify_gpu_tier(gpu_name, total_vram_mb) # CUDA 기능 확인 capability = f"{props.major}.{props.minor}" supports_fp16 = props.major >= 7 # Volta 이상 supports_tensor_cores = props.major >= 7 memory_info = MemoryInfo( total_vram_mb=total_vram_mb, available_vram_mb=available_vram_mb, used_vram_mb=used_vram_mb, gpu_name=gpu_name, gpu_tier=gpu_tier, cuda_capability=capability, supports_fp16=supports_fp16, supports_tensor_cores=supports_tensor_cores ) self.logger.info(f"🔍 GPU 감지: {gpu_name} ({gpu_tier.value})") self.logger.info(f"💾 VRAM: {total_vram_mb}MB 총용량, {available_vram_mb}MB 사용가능") self.logger.info(f"⚡ CUDA {capability}, FP16: {supports_fp16}, TensorCores: {supports_tensor_cores}") self._memory_info = memory_info return memory_info def _classify_gpu_tier(self, gpu_name: str, total_vram_mb: int) -> GPUTier: """GPU 이름과 메모리로 등급 분류""" gpu_name_lower = gpu_name.lower() # RTX 40 시리즈 if "4090" in gpu_name_lower: return GPUTier.RTX_4090_24GB elif "4080" in gpu_name_lower: return GPUTier.RTX_4080_16GB elif "4070" in gpu_name_lower: return GPUTier.RTX_4070_12GB elif "4060" in gpu_name_lower: return GPUTier.RTX_4060_16GB # RTX 30 시리즈 elif "3090" in gpu_name_lower: return GPUTier.RTX_3090_24GB elif "3080" in gpu_name_lower: return GPUTier.RTX_3080_10GB elif "3070" in gpu_name_lower: return GPUTier.RTX_3070_8GB elif "3060" in gpu_name_lower: return GPUTier.RTX_3060_12GB elif "3050" in gpu_name_lower: return GPUTier.RTX_3050_4GB # 메모리 기반 추정 elif total_vram_mb >= 22000: # 22GB+ return GPUTier.RTX_3090_24GB elif total_vram_mb >= 15000: # 15GB+ return GPUTier.RTX_4080_16GB elif total_vram_mb >= 11000: # 11GB+ return GPUTier.RTX_3060_12GB elif total_vram_mb >= 9000: # 9GB+ return GPUTier.RTX_3080_10GB elif total_vram_mb >= 7000: # 7GB+ return GPUTier.RTX_3070_8GB elif total_vram_mb >= 3500: # 3.5GB+ return GPUTier.RTX_3050_4GB else: return GPUTier.UNKNOWN def calculate_optimal_config(self, custom_lama_migan_ratio: Optional[Tuple[int, int]] = None, custom_worker_session_ratio: Optional[float] = None) -> SessionPoolConfig: """최적 세션풀 구성 계산""" if not self._memory_info: self.detect_gpu_info() memory_info = self._memory_info gpu_tier = memory_info.gpu_tier # GPU별 템플릿 가져오기 if gpu_tier in self.GPU_TEMPLATES: template = self.GPU_TEMPLATES[gpu_tier].copy() else: # 알 수 없는 GPU의 경우 보수적 설정 template = { 'target_vram_usage_pct': 50, 'lama_migan_ratio': (7, 3), 'worker_session_ratio': 4.0, 'max_concurrent_clients': 15, } self.logger.warning(f"⚠️ 알 수 없는 GPU: {memory_info.gpu_name}, 보수적 설정 적용") # 사용자 지정 비율 적용 if custom_lama_migan_ratio: template['lama_migan_ratio'] = custom_lama_migan_ratio if custom_worker_session_ratio: template['worker_session_ratio'] = custom_worker_session_ratio # 사용 가능한 VRAM 계산 target_usage_mb = int(memory_info.available_vram_mb * template['target_vram_usage_pct'] / 100) safety_margin_mb = memory_info.available_vram_mb - target_usage_mb # 시스템 오버헤드 제외 available_for_sessions = target_usage_mb - self.MEMORY_USAGE['system_overhead'] if available_for_sessions <= 0: raise RuntimeError(f"❌ 세션풀 생성에 필요한 메모리가 부족합니다: {available_for_sessions}MB") # LAMA:MIGAN 비율 적용 lama_ratio, migan_ratio = template['lama_migan_ratio'] total_ratio = lama_ratio + migan_ratio # 세션 개수 계산 migan_memory_budget = int(available_for_sessions * migan_ratio / total_ratio) lama_memory_budget = int(available_for_sessions * lama_ratio / total_ratio) migan_sessions = max(1, migan_memory_budget // self.MEMORY_USAGE['migan_session']) lama_sessions = max(1, lama_memory_budget // self.MEMORY_USAGE['lama_session']) # OCR 세션 (남은 메모리로) used_memory = (migan_sessions * self.MEMORY_USAGE['migan_session'] + lama_sessions * self.MEMORY_USAGE['lama_session']) remaining_memory = available_for_sessions - used_memory ocr_sessions = max(2, remaining_memory // self.MEMORY_USAGE['ocr_session']) # 워커 수 계산 total_sessions = migan_sessions + lama_sessions + ocr_sessions workers = int(total_sessions * template['worker_session_ratio']) # 총 메모리 사용량 계산 total_usage = (migan_sessions * self.MEMORY_USAGE['migan_session'] + lama_sessions * self.MEMORY_USAGE['lama_session'] + ocr_sessions * self.MEMORY_USAGE['ocr_session'] + self.MEMORY_USAGE['system_overhead']) config = SessionPoolConfig( migan_sessions=migan_sessions, lama_sessions=lama_sessions, ocr_sessions=ocr_sessions, workers=workers, total_vram_usage_mb=total_usage, safety_margin_mb=safety_margin_mb, expected_concurrent_clients=template['max_concurrent_clients'] ) self._current_config = config # 설정 로깅 self.logger.info(f"🎯 최적 세션풀 구성 계산 완료:") self.logger.info(f" 💾 VRAM 사용: {total_usage}MB / {memory_info.available_vram_mb}MB ({template['target_vram_usage_pct']}%)") self.logger.info(f" 🔄 세션풀: MIGAN {migan_sessions}개, LAMA {lama_sessions}개, OCR {ocr_sessions}개") self.logger.info(f" ⚡ 비율: LAMA:MIGAN = {config.lama_migan_ratio}, 워커:세션 = {config.worker_session_ratio:.1f}:1") self.logger.info(f" 👥 예상 동시 클라이언트: {config.expected_concurrent_clients}명") return config def get_current_memory_status(self) -> Dict[str, Any]: """현재 메모리 상태 확인""" if not torch.cuda.is_available(): return {"error": "CUDA 사용 불가"} device = torch.cuda.current_device() allocated = torch.cuda.memory_allocated(device) reserved = torch.cuda.memory_reserved(device) total = torch.cuda.get_device_properties(device).total_memory return { "allocated_mb": int(allocated / (1024 * 1024)), "reserved_mb": int(reserved / (1024 * 1024)), "total_mb": int(total / (1024 * 1024)), "free_mb": int((total - max(allocated, reserved)) / (1024 * 1024)), "utilization_pct": round(max(allocated, reserved) / total * 100, 1) } def monitor_and_adjust(self) -> bool: """메모리 모니터링 및 동적 조정""" current_time = time.time() # 주기적 체크만 수행 if current_time - self._last_memory_check < self._memory_check_interval: return False self._last_memory_check = current_time # 현재 메모리 상태 확인 memory_status = self.get_current_memory_status() if "error" in memory_status: return False # 메모리 사용률이 90% 초과 시 경고 if memory_status["utilization_pct"] > 90: self.logger.warning(f"⚠️ 높은 VRAM 사용률: {memory_status['utilization_pct']}%") self.logger.warning(f" 사용량: {memory_status['allocated_mb']}MB / {memory_status['total_mb']}MB") return True # 메모리 사용률이 95% 초과 시 긴급 정리 if memory_status["utilization_pct"] > 95: self.logger.error(f"🚨 긴급: VRAM 사용률 {memory_status['utilization_pct']}% - 캐시 정리 실행") torch.cuda.empty_cache() return True return False def generate_config_summary(self) -> str: """구성 요약 생성""" if not self._current_config or not self._memory_info: return "❌ 구성 정보 없음" config = self._current_config memory = self._memory_info summary = f""" 🚀 동적 세션풀 구성 요약 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 🔧 GPU: {memory.gpu_name} ({memory.gpu_tier.value}) 💾 VRAM: {memory.total_vram_mb}MB (사용가능: {memory.available_vram_mb}MB) ⚡ 기능: FP16={memory.supports_fp16}, TensorCores={memory.supports_tensor_cores} 🎯 세션풀 구성: - MIGAN: {config.migan_sessions}개 ({config.migan_sessions * 1200}MB) - LAMA: {config.lama_sessions}개 ({config.lama_sessions * 500}MB) - OCR: {config.ocr_sessions}개 ({config.ocr_sessions * 400}MB) - 워커: {config.workers}개 📊 성능 지표: - LAMA:MIGAN 비율: {config.lama_migan_ratio} - 워커:세션 비율: {config.worker_session_ratio:.1f}:1 - 예상 동시 클라이언트: {config.expected_concurrent_clients}명 - VRAM 사용률: {config.total_vram_usage_mb}MB ({config.total_vram_usage_mb/memory.total_vram_mb*100:.1f}%) - 안전 여유분: {config.safety_margin_mb}MB ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ """ return summary # 전역 인스턴스 session_pool_manager = DynamicSessionPoolManager() def get_optimal_session_config(lama_migan_ratio: Optional[Tuple[int, int]] = None, worker_session_ratio: Optional[float] = None) -> SessionPoolConfig: """최적 세션풀 구성 가져오기""" return session_pool_manager.calculate_optimal_config( custom_lama_migan_ratio=lama_migan_ratio, custom_worker_session_ratio=worker_session_ratio ) def print_config_summary(): """구성 요약 출력""" print(session_pool_manager.generate_config_summary()) if __name__ == "__main__": # 테스트 실행 logging.basicConfig(level=logging.INFO) print("🔍 GPU 자동 감지 및 최적 구성 계산...") # 기본 구성 config1 = get_optimal_session_config() # 4:6 비율 적용 config2 = get_optimal_session_config(lama_migan_ratio=(4, 6), worker_session_ratio=3.0) # 3:7 비율 적용 config3 = get_optimal_session_config(lama_migan_ratio=(3, 7), worker_session_ratio=3.0) print_config_summary()