ImageProcessor_MainServer/worker/dynamic_session_pool_manage...

418 lines
16 KiB
Python

"""
동적 메모리 기반 세션풀 관리 시스템
- GPU 메모리 자동 감지
- LAMA:MIGAN 비율 기반 최적 세션풀 구성
- 실시간 메모리 모니터링 및 동적 조정
"""
import logging
import time
from typing import Dict, Tuple, Optional, Any
from dataclasses import dataclass
from enum import Enum
import torch
import psutil
class GPUTier(Enum):
"""GPU 등급 분류"""
RTX_3050_4GB = "rtx_3050_4gb"
RTX_3060_12GB = "rtx_3060_12gb"
RTX_3070_8GB = "rtx_3070_8gb"
RTX_3080_10GB = "rtx_3080_10gb"
RTX_3090_24GB = "rtx_3090_24gb"
RTX_4060_16GB = "rtx_4060_16gb"
RTX_4070_12GB = "rtx_4070_12gb"
RTX_4080_16GB = "rtx_4080_16gb"
RTX_4090_24GB = "rtx_4090_24gb"
UNKNOWN = "unknown"
@dataclass
class SessionPoolConfig:
"""세션풀 구성"""
migan_sessions: int
lama_sessions: int
ocr_sessions: int
workers: int
total_vram_usage_mb: int
safety_margin_mb: int
expected_concurrent_clients: int
@property
def total_sessions(self) -> int:
return self.migan_sessions + self.lama_sessions + self.ocr_sessions
@property
def lama_migan_ratio(self) -> str:
total = self.lama_sessions + self.migan_sessions
if total == 0:
return "0:0"
lama_pct = round((self.lama_sessions / total) * 10)
migan_pct = 10 - lama_pct
return f"{lama_pct}:{migan_pct}"
@property
def worker_session_ratio(self) -> float:
if self.total_sessions == 0:
return 0.0
return self.workers / self.total_sessions
@dataclass
class MemoryInfo:
"""메모리 정보"""
total_vram_mb: int
available_vram_mb: int
used_vram_mb: int
gpu_name: str
gpu_tier: GPUTier
cuda_capability: str
supports_fp16: bool
supports_tensor_cores: bool
class DynamicSessionPoolManager:
"""동적 세션풀 관리자"""
# 🎯 세션별 메모리 사용량 (MB)
MEMORY_USAGE = {
'migan_session': 1200, # MIGAN 웜업 후 메모리
'lama_session': 500, # LAMA 세션 메모리
'ocr_session': 400, # OCR 세션 메모리
'system_overhead': 500, # 시스템 오버헤드
}
# 🎯 GPU별 최적 구성 템플릿
GPU_TEMPLATES = {
GPUTier.RTX_4080_16GB: {
'target_vram_usage_pct': 70,
'lama_migan_ratio': (4, 6), # 4:6
'worker_session_ratio': 3.0,
'max_concurrent_clients': 50,
},
GPUTier.RTX_3090_24GB: {
'target_vram_usage_pct': 70,
'lama_migan_ratio': (3, 7), # 3:7
'worker_session_ratio': 3.0,
'max_concurrent_clients': 70,
},
GPUTier.RTX_3080_10GB: {
'target_vram_usage_pct': 65,
'lama_migan_ratio': (5, 5), # 5:5
'worker_session_ratio': 3.5,
'max_concurrent_clients': 30,
},
GPUTier.RTX_3070_8GB: {
'target_vram_usage_pct': 60,
'lama_migan_ratio': (6, 4), # 6:4 (LAMA 중심)
'worker_session_ratio': 4.0,
'max_concurrent_clients': 20,
},
GPUTier.RTX_3050_4GB: {
'target_vram_usage_pct': 55,
'lama_migan_ratio': (8, 2), # 8:2 (LAMA 중심)
'worker_session_ratio': 5.0,
'max_concurrent_clients': 10,
},
}
def __init__(self, logger: Optional[logging.Logger] = None):
self.logger = logger or logging.getLogger(__name__)
self._memory_info: Optional[MemoryInfo] = None
self._current_config: Optional[SessionPoolConfig] = None
self._last_memory_check = 0
self._memory_check_interval = 30 # 30초마다 메모리 체크
def detect_gpu_info(self) -> MemoryInfo:
"""GPU 정보 자동 감지"""
if not torch.cuda.is_available():
raise RuntimeError("CUDA GPU를 사용할 수 없습니다")
device = torch.cuda.current_device()
props = torch.cuda.get_device_properties(device)
# GPU 메모리 정보
total_memory = torch.cuda.get_device_properties(device).total_memory
total_vram_mb = int(total_memory / (1024 * 1024))
# 현재 사용 중인 메모리
torch.cuda.empty_cache() # 캐시 정리
allocated = torch.cuda.memory_allocated(device)
reserved = torch.cuda.memory_reserved(device)
used_vram_mb = int(max(allocated, reserved) / (1024 * 1024))
available_vram_mb = total_vram_mb - used_vram_mb
# GPU 이름 및 등급 판정
gpu_name = props.name
gpu_tier = self._classify_gpu_tier(gpu_name, total_vram_mb)
# CUDA 기능 확인
capability = f"{props.major}.{props.minor}"
supports_fp16 = props.major >= 7 # Volta 이상
supports_tensor_cores = props.major >= 7
memory_info = MemoryInfo(
total_vram_mb=total_vram_mb,
available_vram_mb=available_vram_mb,
used_vram_mb=used_vram_mb,
gpu_name=gpu_name,
gpu_tier=gpu_tier,
cuda_capability=capability,
supports_fp16=supports_fp16,
supports_tensor_cores=supports_tensor_cores
)
self.logger.info(f"🔍 GPU 감지: {gpu_name} ({gpu_tier.value})")
self.logger.info(f"💾 VRAM: {total_vram_mb}MB 총용량, {available_vram_mb}MB 사용가능")
self.logger.info(f"⚡ CUDA {capability}, FP16: {supports_fp16}, TensorCores: {supports_tensor_cores}")
self._memory_info = memory_info
return memory_info
def _classify_gpu_tier(self, gpu_name: str, total_vram_mb: int) -> GPUTier:
"""GPU 이름과 메모리로 등급 분류"""
gpu_name_lower = gpu_name.lower()
# RTX 40 시리즈
if "4090" in gpu_name_lower:
return GPUTier.RTX_4090_24GB
elif "4080" in gpu_name_lower:
return GPUTier.RTX_4080_16GB
elif "4070" in gpu_name_lower:
return GPUTier.RTX_4070_12GB
elif "4060" in gpu_name_lower:
return GPUTier.RTX_4060_16GB
# RTX 30 시리즈
elif "3090" in gpu_name_lower:
return GPUTier.RTX_3090_24GB
elif "3080" in gpu_name_lower:
return GPUTier.RTX_3080_10GB
elif "3070" in gpu_name_lower:
return GPUTier.RTX_3070_8GB
elif "3060" in gpu_name_lower:
return GPUTier.RTX_3060_12GB
elif "3050" in gpu_name_lower:
return GPUTier.RTX_3050_4GB
# 메모리 기반 추정
elif total_vram_mb >= 22000: # 22GB+
return GPUTier.RTX_3090_24GB
elif total_vram_mb >= 15000: # 15GB+
return GPUTier.RTX_4080_16GB
elif total_vram_mb >= 11000: # 11GB+
return GPUTier.RTX_3060_12GB
elif total_vram_mb >= 9000: # 9GB+
return GPUTier.RTX_3080_10GB
elif total_vram_mb >= 7000: # 7GB+
return GPUTier.RTX_3070_8GB
elif total_vram_mb >= 3500: # 3.5GB+
return GPUTier.RTX_3050_4GB
else:
return GPUTier.UNKNOWN
def calculate_optimal_config(self,
custom_lama_migan_ratio: Optional[Tuple[int, int]] = None,
custom_worker_session_ratio: Optional[float] = None) -> SessionPoolConfig:
"""최적 세션풀 구성 계산"""
if not self._memory_info:
self.detect_gpu_info()
memory_info = self._memory_info
gpu_tier = memory_info.gpu_tier
# GPU별 템플릿 가져오기
if gpu_tier in self.GPU_TEMPLATES:
template = self.GPU_TEMPLATES[gpu_tier].copy()
else:
# 알 수 없는 GPU의 경우 보수적 설정
template = {
'target_vram_usage_pct': 50,
'lama_migan_ratio': (7, 3),
'worker_session_ratio': 4.0,
'max_concurrent_clients': 15,
}
self.logger.warning(f"⚠️ 알 수 없는 GPU: {memory_info.gpu_name}, 보수적 설정 적용")
# 사용자 지정 비율 적용
if custom_lama_migan_ratio:
template['lama_migan_ratio'] = custom_lama_migan_ratio
if custom_worker_session_ratio:
template['worker_session_ratio'] = custom_worker_session_ratio
# 사용 가능한 VRAM 계산
target_usage_mb = int(memory_info.available_vram_mb * template['target_vram_usage_pct'] / 100)
safety_margin_mb = memory_info.available_vram_mb - target_usage_mb
# 시스템 오버헤드 제외
available_for_sessions = target_usage_mb - self.MEMORY_USAGE['system_overhead']
if available_for_sessions <= 0:
raise RuntimeError(f"❌ 세션풀 생성에 필요한 메모리가 부족합니다: {available_for_sessions}MB")
# LAMA:MIGAN 비율 적용
lama_ratio, migan_ratio = template['lama_migan_ratio']
total_ratio = lama_ratio + migan_ratio
# 세션 개수 계산
migan_memory_budget = int(available_for_sessions * migan_ratio / total_ratio)
lama_memory_budget = int(available_for_sessions * lama_ratio / total_ratio)
migan_sessions = max(1, migan_memory_budget // self.MEMORY_USAGE['migan_session'])
lama_sessions = max(1, lama_memory_budget // self.MEMORY_USAGE['lama_session'])
# OCR 세션 (남은 메모리로)
used_memory = (migan_sessions * self.MEMORY_USAGE['migan_session'] +
lama_sessions * self.MEMORY_USAGE['lama_session'])
remaining_memory = available_for_sessions - used_memory
ocr_sessions = max(2, remaining_memory // self.MEMORY_USAGE['ocr_session'])
# 워커 수 계산
total_sessions = migan_sessions + lama_sessions + ocr_sessions
workers = int(total_sessions * template['worker_session_ratio'])
# 총 메모리 사용량 계산
total_usage = (migan_sessions * self.MEMORY_USAGE['migan_session'] +
lama_sessions * self.MEMORY_USAGE['lama_session'] +
ocr_sessions * self.MEMORY_USAGE['ocr_session'] +
self.MEMORY_USAGE['system_overhead'])
config = SessionPoolConfig(
migan_sessions=migan_sessions,
lama_sessions=lama_sessions,
ocr_sessions=ocr_sessions,
workers=workers,
total_vram_usage_mb=total_usage,
safety_margin_mb=safety_margin_mb,
expected_concurrent_clients=template['max_concurrent_clients']
)
self._current_config = config
# 설정 로깅
self.logger.info(f"🎯 최적 세션풀 구성 계산 완료:")
self.logger.info(f" 💾 VRAM 사용: {total_usage}MB / {memory_info.available_vram_mb}MB ({template['target_vram_usage_pct']}%)")
self.logger.info(f" 🔄 세션풀: MIGAN {migan_sessions}개, LAMA {lama_sessions}개, OCR {ocr_sessions}")
self.logger.info(f" ⚡ 비율: LAMA:MIGAN = {config.lama_migan_ratio}, 워커:세션 = {config.worker_session_ratio:.1f}:1")
self.logger.info(f" 👥 예상 동시 클라이언트: {config.expected_concurrent_clients}")
return config
def get_current_memory_status(self) -> Dict[str, Any]:
"""현재 메모리 상태 확인"""
if not torch.cuda.is_available():
return {"error": "CUDA 사용 불가"}
device = torch.cuda.current_device()
allocated = torch.cuda.memory_allocated(device)
reserved = torch.cuda.memory_reserved(device)
total = torch.cuda.get_device_properties(device).total_memory
return {
"allocated_mb": int(allocated / (1024 * 1024)),
"reserved_mb": int(reserved / (1024 * 1024)),
"total_mb": int(total / (1024 * 1024)),
"free_mb": int((total - max(allocated, reserved)) / (1024 * 1024)),
"utilization_pct": round(max(allocated, reserved) / total * 100, 1)
}
def monitor_and_adjust(self) -> bool:
"""메모리 모니터링 및 동적 조정"""
current_time = time.time()
# 주기적 체크만 수행
if current_time - self._last_memory_check < self._memory_check_interval:
return False
self._last_memory_check = current_time
# 현재 메모리 상태 확인
memory_status = self.get_current_memory_status()
if "error" in memory_status:
return False
# 메모리 사용률이 90% 초과 시 경고
if memory_status["utilization_pct"] > 90:
self.logger.warning(f"⚠️ 높은 VRAM 사용률: {memory_status['utilization_pct']}%")
self.logger.warning(f" 사용량: {memory_status['allocated_mb']}MB / {memory_status['total_mb']}MB")
return True
# 메모리 사용률이 95% 초과 시 긴급 정리
if memory_status["utilization_pct"] > 95:
self.logger.error(f"🚨 긴급: VRAM 사용률 {memory_status['utilization_pct']}% - 캐시 정리 실행")
torch.cuda.empty_cache()
return True
return False
def generate_config_summary(self) -> str:
"""구성 요약 생성"""
if not self._current_config or not self._memory_info:
return "❌ 구성 정보 없음"
config = self._current_config
memory = self._memory_info
summary = f"""
🚀 동적 세션풀 구성 요약
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
🔧 GPU: {memory.gpu_name} ({memory.gpu_tier.value})
💾 VRAM: {memory.total_vram_mb}MB (사용가능: {memory.available_vram_mb}MB)
⚡ 기능: FP16={memory.supports_fp16}, TensorCores={memory.supports_tensor_cores}
🎯 세션풀 구성:
- MIGAN: {config.migan_sessions}개 ({config.migan_sessions * 1200}MB)
- LAMA: {config.lama_sessions}개 ({config.lama_sessions * 500}MB)
- OCR: {config.ocr_sessions}개 ({config.ocr_sessions * 400}MB)
- 워커: {config.workers}
📊 성능 지표:
- LAMA:MIGAN 비율: {config.lama_migan_ratio}
- 워커:세션 비율: {config.worker_session_ratio:.1f}:1
- 예상 동시 클라이언트: {config.expected_concurrent_clients}
- VRAM 사용률: {config.total_vram_usage_mb}MB ({config.total_vram_usage_mb/memory.total_vram_mb*100:.1f}%)
- 안전 여유분: {config.safety_margin_mb}MB
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
"""
return summary
# 전역 인스턴스
session_pool_manager = DynamicSessionPoolManager()
def get_optimal_session_config(lama_migan_ratio: Optional[Tuple[int, int]] = None,
worker_session_ratio: Optional[float] = None) -> SessionPoolConfig:
"""최적 세션풀 구성 가져오기"""
return session_pool_manager.calculate_optimal_config(
custom_lama_migan_ratio=lama_migan_ratio,
custom_worker_session_ratio=worker_session_ratio
)
def print_config_summary():
"""구성 요약 출력"""
print(session_pool_manager.generate_config_summary())
if __name__ == "__main__":
# 테스트 실행
logging.basicConfig(level=logging.INFO)
print("🔍 GPU 자동 감지 및 최적 구성 계산...")
# 기본 구성
config1 = get_optimal_session_config()
# 4:6 비율 적용
config2 = get_optimal_session_config(lama_migan_ratio=(4, 6), worker_session_ratio=3.0)
# 3:7 비율 적용
config3 = get_optimal_session_config(lama_migan_ratio=(3, 7), worker_session_ratio=3.0)
print_config_summary()