418 lines
16 KiB
Python
418 lines
16 KiB
Python
"""
|
|
동적 메모리 기반 세션풀 관리 시스템
|
|
- GPU 메모리 자동 감지
|
|
- LAMA:MIGAN 비율 기반 최적 세션풀 구성
|
|
- 실시간 메모리 모니터링 및 동적 조정
|
|
"""
|
|
|
|
import logging
|
|
import time
|
|
from typing import Dict, Tuple, Optional, Any
|
|
from dataclasses import dataclass
|
|
from enum import Enum
|
|
import torch
|
|
import psutil
|
|
|
|
|
|
class GPUTier(Enum):
|
|
"""GPU 등급 분류"""
|
|
RTX_3050_4GB = "rtx_3050_4gb"
|
|
RTX_3060_12GB = "rtx_3060_12gb"
|
|
RTX_3070_8GB = "rtx_3070_8gb"
|
|
RTX_3080_10GB = "rtx_3080_10gb"
|
|
RTX_3090_24GB = "rtx_3090_24gb"
|
|
RTX_4060_16GB = "rtx_4060_16gb"
|
|
RTX_4070_12GB = "rtx_4070_12gb"
|
|
RTX_4080_16GB = "rtx_4080_16gb"
|
|
RTX_4090_24GB = "rtx_4090_24gb"
|
|
UNKNOWN = "unknown"
|
|
|
|
|
|
@dataclass
|
|
class SessionPoolConfig:
|
|
"""세션풀 구성"""
|
|
migan_sessions: int
|
|
lama_sessions: int
|
|
ocr_sessions: int
|
|
workers: int
|
|
total_vram_usage_mb: int
|
|
safety_margin_mb: int
|
|
expected_concurrent_clients: int
|
|
|
|
@property
|
|
def total_sessions(self) -> int:
|
|
return self.migan_sessions + self.lama_sessions + self.ocr_sessions
|
|
|
|
@property
|
|
def lama_migan_ratio(self) -> str:
|
|
total = self.lama_sessions + self.migan_sessions
|
|
if total == 0:
|
|
return "0:0"
|
|
lama_pct = round((self.lama_sessions / total) * 10)
|
|
migan_pct = 10 - lama_pct
|
|
return f"{lama_pct}:{migan_pct}"
|
|
|
|
@property
|
|
def worker_session_ratio(self) -> float:
|
|
if self.total_sessions == 0:
|
|
return 0.0
|
|
return self.workers / self.total_sessions
|
|
|
|
|
|
@dataclass
|
|
class MemoryInfo:
|
|
"""메모리 정보"""
|
|
total_vram_mb: int
|
|
available_vram_mb: int
|
|
used_vram_mb: int
|
|
gpu_name: str
|
|
gpu_tier: GPUTier
|
|
cuda_capability: str
|
|
supports_fp16: bool
|
|
supports_tensor_cores: bool
|
|
|
|
|
|
class DynamicSessionPoolManager:
|
|
"""동적 세션풀 관리자"""
|
|
|
|
# 🎯 세션별 메모리 사용량 (MB)
|
|
MEMORY_USAGE = {
|
|
'migan_session': 1200, # MIGAN 웜업 후 메모리
|
|
'lama_session': 500, # LAMA 세션 메모리
|
|
'ocr_session': 400, # OCR 세션 메모리
|
|
'system_overhead': 500, # 시스템 오버헤드
|
|
}
|
|
|
|
# 🎯 GPU별 최적 구성 템플릿
|
|
GPU_TEMPLATES = {
|
|
GPUTier.RTX_4080_16GB: {
|
|
'target_vram_usage_pct': 70,
|
|
'lama_migan_ratio': (4, 6), # 4:6
|
|
'worker_session_ratio': 3.0,
|
|
'max_concurrent_clients': 50,
|
|
},
|
|
GPUTier.RTX_3090_24GB: {
|
|
'target_vram_usage_pct': 70,
|
|
'lama_migan_ratio': (3, 7), # 3:7
|
|
'worker_session_ratio': 3.0,
|
|
'max_concurrent_clients': 70,
|
|
},
|
|
GPUTier.RTX_3080_10GB: {
|
|
'target_vram_usage_pct': 65,
|
|
'lama_migan_ratio': (5, 5), # 5:5
|
|
'worker_session_ratio': 3.5,
|
|
'max_concurrent_clients': 30,
|
|
},
|
|
GPUTier.RTX_3070_8GB: {
|
|
'target_vram_usage_pct': 60,
|
|
'lama_migan_ratio': (6, 4), # 6:4 (LAMA 중심)
|
|
'worker_session_ratio': 4.0,
|
|
'max_concurrent_clients': 20,
|
|
},
|
|
GPUTier.RTX_3050_4GB: {
|
|
'target_vram_usage_pct': 55,
|
|
'lama_migan_ratio': (8, 2), # 8:2 (LAMA 중심)
|
|
'worker_session_ratio': 5.0,
|
|
'max_concurrent_clients': 10,
|
|
},
|
|
}
|
|
|
|
def __init__(self, logger: Optional[logging.Logger] = None):
|
|
self.logger = logger or logging.getLogger(__name__)
|
|
self._memory_info: Optional[MemoryInfo] = None
|
|
self._current_config: Optional[SessionPoolConfig] = None
|
|
self._last_memory_check = 0
|
|
self._memory_check_interval = 30 # 30초마다 메모리 체크
|
|
|
|
def detect_gpu_info(self) -> MemoryInfo:
|
|
"""GPU 정보 자동 감지"""
|
|
if not torch.cuda.is_available():
|
|
raise RuntimeError("CUDA GPU를 사용할 수 없습니다")
|
|
|
|
device = torch.cuda.current_device()
|
|
props = torch.cuda.get_device_properties(device)
|
|
|
|
# GPU 메모리 정보
|
|
total_memory = torch.cuda.get_device_properties(device).total_memory
|
|
total_vram_mb = int(total_memory / (1024 * 1024))
|
|
|
|
# 현재 사용 중인 메모리
|
|
torch.cuda.empty_cache() # 캐시 정리
|
|
allocated = torch.cuda.memory_allocated(device)
|
|
reserved = torch.cuda.memory_reserved(device)
|
|
used_vram_mb = int(max(allocated, reserved) / (1024 * 1024))
|
|
available_vram_mb = total_vram_mb - used_vram_mb
|
|
|
|
# GPU 이름 및 등급 판정
|
|
gpu_name = props.name
|
|
gpu_tier = self._classify_gpu_tier(gpu_name, total_vram_mb)
|
|
|
|
# CUDA 기능 확인
|
|
capability = f"{props.major}.{props.minor}"
|
|
supports_fp16 = props.major >= 7 # Volta 이상
|
|
supports_tensor_cores = props.major >= 7
|
|
|
|
memory_info = MemoryInfo(
|
|
total_vram_mb=total_vram_mb,
|
|
available_vram_mb=available_vram_mb,
|
|
used_vram_mb=used_vram_mb,
|
|
gpu_name=gpu_name,
|
|
gpu_tier=gpu_tier,
|
|
cuda_capability=capability,
|
|
supports_fp16=supports_fp16,
|
|
supports_tensor_cores=supports_tensor_cores
|
|
)
|
|
|
|
self.logger.info(f"🔍 GPU 감지: {gpu_name} ({gpu_tier.value})")
|
|
self.logger.info(f"💾 VRAM: {total_vram_mb}MB 총용량, {available_vram_mb}MB 사용가능")
|
|
self.logger.info(f"⚡ CUDA {capability}, FP16: {supports_fp16}, TensorCores: {supports_tensor_cores}")
|
|
|
|
self._memory_info = memory_info
|
|
return memory_info
|
|
|
|
def _classify_gpu_tier(self, gpu_name: str, total_vram_mb: int) -> GPUTier:
|
|
"""GPU 이름과 메모리로 등급 분류"""
|
|
gpu_name_lower = gpu_name.lower()
|
|
|
|
# RTX 40 시리즈
|
|
if "4090" in gpu_name_lower:
|
|
return GPUTier.RTX_4090_24GB
|
|
elif "4080" in gpu_name_lower:
|
|
return GPUTier.RTX_4080_16GB
|
|
elif "4070" in gpu_name_lower:
|
|
return GPUTier.RTX_4070_12GB
|
|
elif "4060" in gpu_name_lower:
|
|
return GPUTier.RTX_4060_16GB
|
|
|
|
# RTX 30 시리즈
|
|
elif "3090" in gpu_name_lower:
|
|
return GPUTier.RTX_3090_24GB
|
|
elif "3080" in gpu_name_lower:
|
|
return GPUTier.RTX_3080_10GB
|
|
elif "3070" in gpu_name_lower:
|
|
return GPUTier.RTX_3070_8GB
|
|
elif "3060" in gpu_name_lower:
|
|
return GPUTier.RTX_3060_12GB
|
|
elif "3050" in gpu_name_lower:
|
|
return GPUTier.RTX_3050_4GB
|
|
|
|
# 메모리 기반 추정
|
|
elif total_vram_mb >= 22000: # 22GB+
|
|
return GPUTier.RTX_3090_24GB
|
|
elif total_vram_mb >= 15000: # 15GB+
|
|
return GPUTier.RTX_4080_16GB
|
|
elif total_vram_mb >= 11000: # 11GB+
|
|
return GPUTier.RTX_3060_12GB
|
|
elif total_vram_mb >= 9000: # 9GB+
|
|
return GPUTier.RTX_3080_10GB
|
|
elif total_vram_mb >= 7000: # 7GB+
|
|
return GPUTier.RTX_3070_8GB
|
|
elif total_vram_mb >= 3500: # 3.5GB+
|
|
return GPUTier.RTX_3050_4GB
|
|
else:
|
|
return GPUTier.UNKNOWN
|
|
|
|
def calculate_optimal_config(self,
|
|
custom_lama_migan_ratio: Optional[Tuple[int, int]] = None,
|
|
custom_worker_session_ratio: Optional[float] = None) -> SessionPoolConfig:
|
|
"""최적 세션풀 구성 계산"""
|
|
|
|
if not self._memory_info:
|
|
self.detect_gpu_info()
|
|
|
|
memory_info = self._memory_info
|
|
gpu_tier = memory_info.gpu_tier
|
|
|
|
# GPU별 템플릿 가져오기
|
|
if gpu_tier in self.GPU_TEMPLATES:
|
|
template = self.GPU_TEMPLATES[gpu_tier].copy()
|
|
else:
|
|
# 알 수 없는 GPU의 경우 보수적 설정
|
|
template = {
|
|
'target_vram_usage_pct': 50,
|
|
'lama_migan_ratio': (7, 3),
|
|
'worker_session_ratio': 4.0,
|
|
'max_concurrent_clients': 15,
|
|
}
|
|
self.logger.warning(f"⚠️ 알 수 없는 GPU: {memory_info.gpu_name}, 보수적 설정 적용")
|
|
|
|
# 사용자 지정 비율 적용
|
|
if custom_lama_migan_ratio:
|
|
template['lama_migan_ratio'] = custom_lama_migan_ratio
|
|
if custom_worker_session_ratio:
|
|
template['worker_session_ratio'] = custom_worker_session_ratio
|
|
|
|
# 사용 가능한 VRAM 계산
|
|
target_usage_mb = int(memory_info.available_vram_mb * template['target_vram_usage_pct'] / 100)
|
|
safety_margin_mb = memory_info.available_vram_mb - target_usage_mb
|
|
|
|
# 시스템 오버헤드 제외
|
|
available_for_sessions = target_usage_mb - self.MEMORY_USAGE['system_overhead']
|
|
|
|
if available_for_sessions <= 0:
|
|
raise RuntimeError(f"❌ 세션풀 생성에 필요한 메모리가 부족합니다: {available_for_sessions}MB")
|
|
|
|
# LAMA:MIGAN 비율 적용
|
|
lama_ratio, migan_ratio = template['lama_migan_ratio']
|
|
total_ratio = lama_ratio + migan_ratio
|
|
|
|
# 세션 개수 계산
|
|
migan_memory_budget = int(available_for_sessions * migan_ratio / total_ratio)
|
|
lama_memory_budget = int(available_for_sessions * lama_ratio / total_ratio)
|
|
|
|
migan_sessions = max(1, migan_memory_budget // self.MEMORY_USAGE['migan_session'])
|
|
lama_sessions = max(1, lama_memory_budget // self.MEMORY_USAGE['lama_session'])
|
|
|
|
# OCR 세션 (남은 메모리로)
|
|
used_memory = (migan_sessions * self.MEMORY_USAGE['migan_session'] +
|
|
lama_sessions * self.MEMORY_USAGE['lama_session'])
|
|
remaining_memory = available_for_sessions - used_memory
|
|
ocr_sessions = max(2, remaining_memory // self.MEMORY_USAGE['ocr_session'])
|
|
|
|
# 워커 수 계산
|
|
total_sessions = migan_sessions + lama_sessions + ocr_sessions
|
|
workers = int(total_sessions * template['worker_session_ratio'])
|
|
|
|
# 총 메모리 사용량 계산
|
|
total_usage = (migan_sessions * self.MEMORY_USAGE['migan_session'] +
|
|
lama_sessions * self.MEMORY_USAGE['lama_session'] +
|
|
ocr_sessions * self.MEMORY_USAGE['ocr_session'] +
|
|
self.MEMORY_USAGE['system_overhead'])
|
|
|
|
config = SessionPoolConfig(
|
|
migan_sessions=migan_sessions,
|
|
lama_sessions=lama_sessions,
|
|
ocr_sessions=ocr_sessions,
|
|
workers=workers,
|
|
total_vram_usage_mb=total_usage,
|
|
safety_margin_mb=safety_margin_mb,
|
|
expected_concurrent_clients=template['max_concurrent_clients']
|
|
)
|
|
|
|
self._current_config = config
|
|
|
|
# 설정 로깅
|
|
self.logger.info(f"🎯 최적 세션풀 구성 계산 완료:")
|
|
self.logger.info(f" 💾 VRAM 사용: {total_usage}MB / {memory_info.available_vram_mb}MB ({template['target_vram_usage_pct']}%)")
|
|
self.logger.info(f" 🔄 세션풀: MIGAN {migan_sessions}개, LAMA {lama_sessions}개, OCR {ocr_sessions}개")
|
|
self.logger.info(f" ⚡ 비율: LAMA:MIGAN = {config.lama_migan_ratio}, 워커:세션 = {config.worker_session_ratio:.1f}:1")
|
|
self.logger.info(f" 👥 예상 동시 클라이언트: {config.expected_concurrent_clients}명")
|
|
|
|
return config
|
|
|
|
def get_current_memory_status(self) -> Dict[str, Any]:
|
|
"""현재 메모리 상태 확인"""
|
|
if not torch.cuda.is_available():
|
|
return {"error": "CUDA 사용 불가"}
|
|
|
|
device = torch.cuda.current_device()
|
|
allocated = torch.cuda.memory_allocated(device)
|
|
reserved = torch.cuda.memory_reserved(device)
|
|
total = torch.cuda.get_device_properties(device).total_memory
|
|
|
|
return {
|
|
"allocated_mb": int(allocated / (1024 * 1024)),
|
|
"reserved_mb": int(reserved / (1024 * 1024)),
|
|
"total_mb": int(total / (1024 * 1024)),
|
|
"free_mb": int((total - max(allocated, reserved)) / (1024 * 1024)),
|
|
"utilization_pct": round(max(allocated, reserved) / total * 100, 1)
|
|
}
|
|
|
|
def monitor_and_adjust(self) -> bool:
|
|
"""메모리 모니터링 및 동적 조정"""
|
|
current_time = time.time()
|
|
|
|
# 주기적 체크만 수행
|
|
if current_time - self._last_memory_check < self._memory_check_interval:
|
|
return False
|
|
|
|
self._last_memory_check = current_time
|
|
|
|
# 현재 메모리 상태 확인
|
|
memory_status = self.get_current_memory_status()
|
|
|
|
if "error" in memory_status:
|
|
return False
|
|
|
|
# 메모리 사용률이 90% 초과 시 경고
|
|
if memory_status["utilization_pct"] > 90:
|
|
self.logger.warning(f"⚠️ 높은 VRAM 사용률: {memory_status['utilization_pct']}%")
|
|
self.logger.warning(f" 사용량: {memory_status['allocated_mb']}MB / {memory_status['total_mb']}MB")
|
|
return True
|
|
|
|
# 메모리 사용률이 95% 초과 시 긴급 정리
|
|
if memory_status["utilization_pct"] > 95:
|
|
self.logger.error(f"🚨 긴급: VRAM 사용률 {memory_status['utilization_pct']}% - 캐시 정리 실행")
|
|
torch.cuda.empty_cache()
|
|
return True
|
|
|
|
return False
|
|
|
|
def generate_config_summary(self) -> str:
|
|
"""구성 요약 생성"""
|
|
if not self._current_config or not self._memory_info:
|
|
return "❌ 구성 정보 없음"
|
|
|
|
config = self._current_config
|
|
memory = self._memory_info
|
|
|
|
summary = f"""
|
|
🚀 동적 세션풀 구성 요약
|
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
🔧 GPU: {memory.gpu_name} ({memory.gpu_tier.value})
|
|
💾 VRAM: {memory.total_vram_mb}MB (사용가능: {memory.available_vram_mb}MB)
|
|
⚡ 기능: FP16={memory.supports_fp16}, TensorCores={memory.supports_tensor_cores}
|
|
|
|
🎯 세션풀 구성:
|
|
- MIGAN: {config.migan_sessions}개 ({config.migan_sessions * 1200}MB)
|
|
- LAMA: {config.lama_sessions}개 ({config.lama_sessions * 500}MB)
|
|
- OCR: {config.ocr_sessions}개 ({config.ocr_sessions * 400}MB)
|
|
- 워커: {config.workers}개
|
|
|
|
📊 성능 지표:
|
|
- LAMA:MIGAN 비율: {config.lama_migan_ratio}
|
|
- 워커:세션 비율: {config.worker_session_ratio:.1f}:1
|
|
- 예상 동시 클라이언트: {config.expected_concurrent_clients}명
|
|
- VRAM 사용률: {config.total_vram_usage_mb}MB ({config.total_vram_usage_mb/memory.total_vram_mb*100:.1f}%)
|
|
- 안전 여유분: {config.safety_margin_mb}MB
|
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
"""
|
|
return summary
|
|
|
|
|
|
# 전역 인스턴스
|
|
session_pool_manager = DynamicSessionPoolManager()
|
|
|
|
|
|
def get_optimal_session_config(lama_migan_ratio: Optional[Tuple[int, int]] = None,
|
|
worker_session_ratio: Optional[float] = None) -> SessionPoolConfig:
|
|
"""최적 세션풀 구성 가져오기"""
|
|
return session_pool_manager.calculate_optimal_config(
|
|
custom_lama_migan_ratio=lama_migan_ratio,
|
|
custom_worker_session_ratio=worker_session_ratio
|
|
)
|
|
|
|
|
|
def print_config_summary():
|
|
"""구성 요약 출력"""
|
|
print(session_pool_manager.generate_config_summary())
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# 테스트 실행
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
print("🔍 GPU 자동 감지 및 최적 구성 계산...")
|
|
|
|
# 기본 구성
|
|
config1 = get_optimal_session_config()
|
|
|
|
# 4:6 비율 적용
|
|
config2 = get_optimal_session_config(lama_migan_ratio=(4, 6), worker_session_ratio=3.0)
|
|
|
|
# 3:7 비율 적용
|
|
config3 = get_optimal_session_config(lama_migan_ratio=(3, 7), worker_session_ratio=3.0)
|
|
|
|
print_config_summary()
|
|
|