상태 JSON 파일의 워커 수를 10으로 증가시키고, 각 모델에 대한 최소 및 최대 세션 수를 설정하였습니다. API 엔드포인트를 '/api/v1/health'로 변경하고, 세션 풀 관리 로직을 개선하여 VRAM 사용량에 따라 동적으로 세션을 생성 및 제거하도록 하였습니다. 대시보드에서 세션 정보 표시 방식을 개선하였으며, 로그 파일에 서버 프로세스 ID 변경 사항을 반영하였습니다.

2025-08-30 00:07:13 +09:00 · 2025-08-30 00:07:13 +09:00 · c18667c17d
parent a8e0c166a3
commit c18667c17d
16 changed files with 2345 additions and 444 deletions
--- a/app/api/endpoints.py
+++ b/app/api/endpoints.py
@ -103,7 +103,7 @@ def create_response(
        })


-@router.get("/health", response_model=HealthResponse)
+@router.get("/api/v1/health", response_model=HealthResponse, name="health_check")
 async def health_check():
    """서버 상태 확인"""
    start_time = getattr(settings, 'start_time', time.time())
--- a/app/core/config.py
+++ b/app/core/config.py
@ -3,16 +3,38 @@ Configuration settings for the inpainting server
 """
 import os
 import platform
-from typing import Dict, Any, Optional
+from typing import Dict, Any, Optional, ClassVar
 from pydantic_settings import BaseSettings


 class Settings(BaseSettings):
    # System detection
-    IS_JETSON: bool = platform.machine() == "aarch64" and "tegra" in platform.uname().release.lower()
-    IS_X86: bool = platform.machine() in ["x86_64", "amd64"]
+    IS_JETSON: ClassVar[bool] = "aarch64" in platform.machine().lower() and "tegra" in platform.release().lower()
+    PROJECT_ROOT: ClassVar[str] = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+    # --- 동적 세션 풀 및 메모리 관리 설정 ---
+    # 각 모델별 최소/최대 세션 수. 서버 시작 시 min 만큼 생성되고, VRAM 여유 시 max까지 확장됨.
+    SIMPLE_LAMA_MIN_SESSIONS: int = 2 if IS_JETSON else 2
+    SIMPLE_LAMA_MAX_SESSIONS: int = 4 if IS_JETSON else 4
    
-    # Server settings
+    MIGAN_MIN_SESSIONS: int = 2 if IS_JETSON else 1  # x86에서는 Migan을 기본으로 로드하지 않음
+    MIGAN_MAX_SESSIONS: int = 4 if IS_JETSON else 4
+    
+    REMBG_MIN_SESSIONS: int = 2 if IS_JETSON else 1
+    REMBG_MAX_SESSIONS: int = 4 if IS_JETSON else 3
+
+    # 세션 추가 생성 VRAM 임계값 (%). 남은 VRAM이 이 값보다 커야 새 세션 생성.
+    # 예: 12GB VRAM에 0.3이면, 3.6GB 이상 여유가 있어야 함.
+    SESSION_VRAM_THRESHOLD: float = 0.3 if IS_JETSON else 0.3 # Jetson에서는 통합 메모리이므로 좀 더 여유롭게 설정
+
+    # 유휴 세션 자동 제거 시간 (초). 0이면 비활성화.
+    SESSION_IDLE_TIMEOUT: int = 1800  # 30분
+
+    # --- 서버 환경 설정 (클래스 내부로 이동) ---
+    APP_VERSION: str = "3.0.0-dynamic-pool"
+    APP_NAME: str = "Inpaint & RemoveBG Server"
+    API_PREFIX: str = "/api/v1"
+
    HOST: str = "0.0.0.0"
    PORT: int = 8008
    WORKERS: int = 1 # 개발 모드에서는 1로 고정, 프로덕션에서는 gunicorn으로 관리
@ -40,13 +62,13 @@ class Settings(BaseSettings):
    REMBG_SESSIONS: int = 3 if IS_JETSON else 2       # 메모리 공유 방식의 이점 활용
    
    # Worker settings (Jetson은 통합 메모리로 더 효율적)
-    MAX_WORKERS: int = 8 if IS_JETSON else 16   # Jetson: 메모리 오버헤드 적음
-    MIN_WORKERS: int = 4 if IS_JETSON else 8   # 통합 메모리 활용
+    MAX_WORKERS: int = 10 if IS_JETSON else 8
+    MIN_WORKERS: int = 10 if IS_JETSON else 4
    WORKER_TIMEOUT: int = 120  # 2 minutes
    
    # 메모리 관리 (Jetson은 32GB 통합 메모리로 여유로움)
-    VRAM_THRESHOLD_HIGH: float = 0.85 if IS_JETSON else 0.75  # Jetson: 32GB 통합 메모리
-    VRAM_THRESHOLD_LOW: float = 0.4 if IS_JETSON else 0.3     # 데스크톱: VRAM 제한
+    VRAM_THRESHOLD_HIGH: float = 0.85 if IS_JETSON else 0.80  # 워커 추가 생성 중단
+    VRAM_THRESHOLD_LOW: float = 0.4 if IS_JETSON else 0.40   # 워커 제거 시작
    VRAM_CHECK_INTERVAL: int = 30 if IS_JETSON else 15        # Jetson은 덜 자주 체크
    
    # Model paths
--- a/app/core/session_pool.py
+++ b/app/core/session_pool.py
@ -1,21 +1,25 @@
 """
 세션 풀 관리 시스템
 각 모델(simple-lama, migan, rembg)의 세션을 효율적으로 관리합니다.
+VRAM 사용량을 고려하여 세션을 동적으로 생성하고, 유휴 세션을 자동으로 제거합니다.
 """
 import asyncio
 import logging
 import time
-from typing import Dict, List, Optional, Any
+from typing import Dict, List, Optional, Any, Tuple
 from enum import Enum
 from dataclasses import dataclass
 from contextlib import asynccontextmanager

+from ..core.config import settings
+from ..utils.gpu_monitor import gpu_monitor
+
 logger = logging.getLogger(__name__)


 class ModelType(Enum):
    SIMPLE_LAMA = "simple_lama"
-    MIGAN = "migan" 
+    MIGAN = "migan"
    REMBG = "rembg"


@ -27,86 +31,123 @@ class ModelSession:
    created_at: float
    last_used: float
    in_use: bool = False
-    
+
    def mark_used(self):
        self.last_used = time.time()
-        
-    def is_expired(self, timeout: int = 3600) -> bool:
-        return time.time() - self.last_used > timeout
+
+    def is_idle(self, timeout: int) -> bool:
+        if timeout <= 0:
+            return False
+        return not self.in_use and (time.time() - self.last_used > timeout)


 class SessionPool:
-    def __init__(self, 
-                 simple_lama_count: int = 2,
-                 migan_count: int = 2, 
-                 rembg_count: int = 1):
-        self.pools: Dict[ModelType, List[ModelSession]] = {
-            ModelType.SIMPLE_LAMA: [],
-            ModelType.MIGAN: [],
-            ModelType.REMBG: []
-        }
-        self.pool_sizes = {
-            ModelType.SIMPLE_LAMA: simple_lama_count,
-            ModelType.MIGAN: migan_count,
-            ModelType.REMBG: rembg_count
-        }
-        self.locks: Dict[ModelType, asyncio.Lock] = {
-            model_type: asyncio.Lock() for model_type in ModelType
+    def __init__(self, model_configs: Dict[ModelType, Tuple[int, int]]):
+        self.pools: Dict[ModelType, List[ModelSession]] = {mt: [] for mt in ModelType}
+        self.model_configs = model_configs
+        
+        self.conditions: Dict[ModelType, asyncio.Condition] = {
+            mt: asyncio.Condition() for mt in ModelType
        }
        self._initialized = False
+        self._reaper_task: Optional[asyncio.Task] = None
        
    async def initialize(self):
-        """모든 모델 세션을 초기화합니다."""
        if self._initialized:
            return
            
-        logger.info("Initializing session pools...")
+        logger.info("Initializing dynamic session pools...")
        
-        for model_type, count in self.pool_sizes.items():
-            await self._initialize_model_pool(model_type, count)
-            
+        for model_type, (min_sessions, _) in self.model_configs.items():
+            if min_sessions > 0:
+                logger.info(f"Pre-loading {min_sessions} sessions for {model_type.value}")
+                # Use asyncio.gather for concurrent session creation
+                tasks = [
+                    self._create_session(model_type, f"{model_type.value}_{i}")
+                    for i in range(min_sessions)
+                ]
+                try:
+                    results = await asyncio.gather(*tasks, return_exceptions=True)
+                    for i, res in enumerate(results):
+                        if isinstance(res, ModelSession):
+                            self.pools[model_type].append(res)
+                        else:
+                            logger.error(f"Failed to create initial session {model_type.value}_{i}: {res}")
+                except Exception as e:
+                    logger.error(f"Error during concurrent session creation for {model_type.value}: {e}", exc_info=True)
+
        self._initialized = True
+        
+        # 유휴 세션 제거 작업 시작
+        if settings.SESSION_IDLE_TIMEOUT > 0:
+            self._reaper_task = asyncio.create_task(self._reap_idle_sessions())
+            
        logger.info("Session pools initialized successfully")
-        
-    async def _initialize_model_pool(self, model_type: ModelType, count: int):
-        """특정 모델의 세션 풀을 초기화합니다."""
-        logger.info(f"Initializing {count} sessions for {model_type.value}")
-        
-        for i in range(count):
-            try:
-                session = await self._create_session(model_type, f"{model_type.value}_{i}")
-                self.pools[model_type].append(session)
-                logger.info(f"Created session {session.session_id}")
-            except Exception as e:
-                logger.error(f"Failed to create session for {model_type.value}: {e}")
+
+    async def _reap_idle_sessions(self):
+        """유휴 세션을 주기적으로 제거하는 백그라운드 작업."""
+        logger.info(f"Idle session reaper started. Timeout: {settings.SESSION_IDLE_TIMEOUT}s, Check Interval: 60s")
+        while True:
+            await asyncio.sleep(60) # 1분마다 체크
+            
+            for model_type, pool in self.pools.items():
+                min_sessions, _ = self.model_configs[model_type]
                
+                async with self.conditions[model_type]:
+                    if len(pool) <= min_sessions:
+                        continue
+                    
+                    # 사용 중이 아니고, 타임아웃이 지난 세션을 찾음
+                    idle_sessions = [s for s in pool if s.is_idle(settings.SESSION_IDLE_TIMEOUT)]
+                    
+                    # 제거할 세션 수 결정 (min_sessions 이하로는 줄이지 않음)
+                    num_to_reap = min(len(idle_sessions), len(pool) - min_sessions)
+                    
+                    if num_to_reap > 0:
+                        sessions_to_reap = idle_sessions[:num_to_reap]
+                        logger.info(f"Reaping {len(sessions_to_reap)} idle session(s) for {model_type.value}. Current pool size: {len(pool)}, Min size: {min_sessions}")
+                        for session in sessions_to_reap:
+                            pool.remove(session)
+                            # 명시적으로 모델 객체를 삭제하여 GC가 메모리를 회수하도록 유도
+                            del session.model
+                            del session
+                        
+                        # Notify any waiting tasks that pool state has changed
+                        self.conditions[model_type].notify_all()
+
+
    async def _create_session(self, model_type: ModelType, session_id: str) -> ModelSession:
        """새로운 모델 세션을 생성합니다."""
-        model = await self._load_model(model_type)
-        return ModelSession(
-            session_id=session_id,
-            model_type=model_type,
-            model=model,
-            created_at=time.time(),
-            last_used=time.time()
-        )
-        
+        logger.info(f"Creating new session {session_id} for {model_type.value}...")
+        try:
+            model = await self._load_model(model_type)
+            session = ModelSession(
+                session_id=session_id,
+                model_type=model_type,
+                model=model,
+                created_at=time.time(),
+                last_used=time.time()
+            )
+            logger.info(f"Successfully created session {session.session_id}")
+            return session
+        except Exception as e:
+            logger.error(f"Failed to create session {session_id}: {e}", exc_info=True)
+            raise
+
    async def _load_model(self, model_type: ModelType) -> Any:
        """모델을 로드합니다."""
-        # 실제 구현에서는 각 모델을 로드하는 로직이 들어갑니다
        if model_type == ModelType.SIMPLE_LAMA:
            return await self._load_simple_lama_model()
        elif model_type == ModelType.MIGAN:
            return await self._load_migan_model()
        elif model_type == ModelType.REMBG:
-            return await self._load_rembg_model()
+            return await asyncio.wait_for(self._load_rembg_model(), timeout=180) # 모델 다운로드 시간 고려
        else:
            raise ValueError(f"Unknown model type: {model_type}")
-            
+
    async def _load_simple_lama_model(self):
        """Simple LAMA 모델을 로드합니다."""
        from ..models.simple_lama import SimpleLamaInpainter
-        from ..core.config import settings
        
        try:
            model = SimpleLamaInpainter(
@ -115,19 +156,17 @@ class SessionPool:
                fp16=settings.USE_FP16
            )
            await model.load_model()
-            logger.info("Simple LAMA 모델 세션 로드 완료")
+            logger.debug("Simple LAMA model instance created and loaded.")
            return model
        except Exception as e:
-            logger.error(f"Simple LAMA 모델 로드 실패: {e}")
+            logger.error(f"Failed to load Simple LAMA model: {e}", exc_info=True)
            raise
        
    async def _load_migan_model(self):
        """MIGAN 모델을 로드합니다."""
        from ..models.migan import MiganInpainter
-        from ..core.config import settings
        
        try:
-            # MIGAN 모델 생성 - ONNX Runtime이 자동으로 CUDA 감지
            model = MiganInpainter(
                model_path=getattr(settings, 'MIGAN_ONNX_PATH', settings.MIGAN_MODEL_PATH),
                device="cuda" if settings.USE_CUDA else "cpu",
@ -135,31 +174,28 @@ class SessionPool:
                use_cuda=settings.USE_CUDA
            )
            await model.load_model()
-            logger.info("MIGAN 모델 세션 로드 완료")
+            logger.debug("MIGAN model instance created and loaded.")
            return model
        except Exception as e:
-            logger.error(f"MIGAN 모델 로드 실패: {e}")
+            logger.error(f"Failed to load MIGAN model: {e}", exc_info=True)
            raise
        
    async def _load_rembg_model(self):
        """REMBG 모델을 로드합니다."""
        from ..models.rembg_model import RembgProcessor
-        from ..core.config import settings
        
        try:
-            # RemBG 모델 생성 - 자동으로 CUDA 감지
            model = RembgProcessor(
                model_name=getattr(settings, 'REMBG_MODEL_NAME', 'birefnet-general-lite'),
                device="cuda" if settings.USE_CUDA else "cpu",
                fp16=settings.USE_FP16,
                local_rembg_model_path=getattr(settings, 'LOCAL_REMBG_MODEL_PATH', None)
            )
-            # 프리로드 강제: 실패 시 서버 기동 실패로 처리 (원인 파악을 위함)
            await model.load_model()
-            logger.info("REMBG 모델 세션 로드 완료")
+            logger.debug("RembgProcessor instance created and model pre-loaded.")
            return model
        except Exception as e:
-            logger.error(f"REMBG 모델 로드 실패: {e}")
+            logger.error(f"Failed to load REMBG model: {e}", exc_info=True)
            raise
        
    @asynccontextmanager
@ -169,122 +205,89 @@ class SessionPool:
        try:
            yield session
        finally:
-            await self._release_session(session)
+            if session:
+                await self._release_session(session)
            
    async def _acquire_session(self, model_type: ModelType) -> ModelSession:
-        """사용 가능한 세션을 획득합니다."""
-        async with self.locks[model_type]:
-            # 사용 가능한 세션 찾기
-            for session in self.pools[model_type]:
-                if not session.in_use:
-                    session.in_use = True
-                    session.mark_used()
-                    logger.debug(f"Acquired session {session.session_id}")
-                    return session
-                    
-            # 사용 가능한 세션이 없으면 대기
-            logger.warning(f"No available sessions for {model_type.value}, waiting...")
-            
-        # 세션이 사용 가능해질 때까지 대기
+        """사용 가능한 세션을 획득하거나, VRAM 여유 시 새로 생성합니다."""
+        condition = self.conditions[model_type]
+        _, max_sessions = self.model_configs[model_type]
+
        while True:
-            await asyncio.sleep(0.1)
-            async with self.locks[model_type]:
+            async with condition:
+                # 1. 사용 가능한 세션 찾기
                for session in self.pools[model_type]:
                    if not session.in_use:
                        session.in_use = True
                        session.mark_used()
-                        logger.debug(f"Acquired session {session.session_id} after waiting")
+                        logger.debug(f"Acquired existing session {session.session_id}")
                        return session
+                
+                # 2. 세션을 새로 생성할 수 있는지 확인
+                if len(self.pools[model_type]) < max_sessions:
+                    # 2-1. VRAM 여유 공간 확인
+                    gpu_mem_info = gpu_monitor.get_gpu_memory_info()
+                    free_vram_ratio = gpu_mem_info.get("free_ratio", 0)
+                    
+                    if free_vram_ratio > settings.SESSION_VRAM_THRESHOLD:
+                        current_pool_size = len(self.pools[model_type])
+                        session_id = f"{model_type.value}_{current_pool_size}"
                        
+                        logger.info(f"Attempting to create new session for {model_type.value}. Current size: {current_pool_size}, Max size: {max_sessions}")
+                        
+                        # Condition lock을 잠시 해제하고 세션 생성 (I/O 작업)
+                        try:
+                            new_session = await self._create_session(model_type, session_id)
+                            # 다시 lock을 잡고 풀에 추가
+                            new_session.in_use = True
+                            new_session.mark_used()
+                            self.pools[model_type].append(new_session)
+                            logger.info(f"Acquired new session {new_session.session_id} as VRAM is sufficient ({free_vram_ratio:.2f} > {settings.SESSION_VRAM_THRESHOLD:.2f})")
+                            return new_session
+                        except Exception:
+                            # 세션 생성 실패 시 루프를 계속하여 다시 시도하거나 대기
+                            logger.error(f"New session creation failed for {model_type.value}. Will wait for an existing session.")
+                            pass
+                    else:
+                        logger.warning(f"Cannot create new session for {model_type.value}. VRAM threshold not met. (Free: {free_vram_ratio:.2f} <= Threshold: {settings.SESSION_VRAM_THRESHOLD:.2f})")
+
+                # 3. 대기
+                logger.debug(f"No available sessions or VRAM for {model_type.value}, waiting...")
+                await condition.wait()
+
    async def _release_session(self, session: ModelSession):
-        """세션을 반환합니다."""
-        async with self.locks[session.model_type]:
+        """세션을 반환하고 대기 중인 다른 요청에 알립니다."""
+        condition = self.conditions[session.model_type]
+        async with condition:
            session.in_use = False
            logger.debug(f"Released session {session.session_id}")
+            condition.notify() # 대기 중인 하나의 코루틴을 깨움
            
-    async def get_pool_status(self) -> Dict[str, Any]:
-        """풀 상태를 반환합니다."""
-        status = {}
-        for model_type in ModelType:
-            pool = self.pools[model_type]
-            total = len(pool)
-            in_use = sum(1 for session in pool if session.in_use)
-            available = total - in_use
-            
-            status[model_type.value] = {
-                "total": total,
-                "in_use": in_use,
-                "available": available,
-                "sessions": [
-                    {
-                        "id": session.session_id,
-                        "in_use": session.in_use,
-                        "last_used": session.last_used,
-                        "created_at": session.created_at
-                    }
-                    for session in pool
-                ]
-            }
-        return status
-        
-    async def cleanup_expired_sessions(self, timeout: int = 3600):
-        """만료된 세션을 정리합니다."""
-        for model_type, pool in self.pools.items():
-            async with self.locks[model_type]:
-                expired_sessions = [s for s in pool if s.is_expired(timeout) and not s.in_use]
-                for session in expired_sessions:
-                    pool.remove(session)
-                    logger.info(f"Removed expired session {session.session_id}")
-                    
-    async def scale_pool(self, model_type: ModelType, new_size: int):
-        """풀 크기를 조정합니다."""
-        async with self.locks[model_type]:
-            current_size = len(self.pools[model_type])
-            
-            if new_size > current_size:
-                # 세션 추가
-                for i in range(current_size, new_size):
-                    session_id = f"{model_type.value}_{i}"
-                    session = await self._create_session(model_type, session_id)
-                    self.pools[model_type].append(session)
-                    logger.info(f"Added session {session_id}")
-                    
-            elif new_size < current_size:
-                # 세션 제거 (사용 중이지 않은 것만)
-                sessions_to_remove = []
-                for session in self.pools[model_type]:
-                    if not session.in_use and len(sessions_to_remove) < (current_size - new_size):
-                        sessions_to_remove.append(session)
-                        
-                for session in sessions_to_remove:
-                    self.pools[model_type].remove(session)
-                    logger.info(f"Removed session {session.session_id}")
-                    
-            self.pool_sizes[model_type] = new_size
-
    def get_status(self) -> dict:
        """세션 풀의 현재 상태를 반환합니다."""
        status_by_model = {}
        
        for model_type in ModelType:
            pool = self.pools[model_type]
+            min_s, max_s = self.model_configs[model_type]
            total = len(pool)
            in_use = sum(1 for session in pool if session.in_use)
            available = total - in_use
            
            status_by_model[model_type.value] = {
+                "min": min_s,
+                "max": max_s,
                "total": total,
                "in_use": in_use,
                "available": available
            }
-            
        return status_by_model

-
 # 전역 세션 풀 인스턴스 (설정값으로 초기화)
-from ..core.config import settings
-session_pool = SessionPool(
-    simple_lama_count=settings.SIMPLE_LAMA_SESSIONS,
-    migan_count=settings.MIGAN_SESSIONS,
-    rembg_count=settings.REMBG_SESSIONS
-)
+model_configs = {
+    ModelType.SIMPLE_LAMA: (settings.SIMPLE_LAMA_MIN_SESSIONS, settings.SIMPLE_LAMA_MAX_SESSIONS),
+    ModelType.MIGAN: (settings.MIGAN_MIN_SESSIONS, settings.MIGAN_MAX_SESSIONS),
+    ModelType.REMBG: (settings.REMBG_MIN_SESSIONS, settings.REMBG_MAX_SESSIONS),
+}
+
+session_pool = SessionPool(model_configs=model_configs)
--- a/app/monitoring/dashboard.py
+++ b/app/monitoring/dashboard.py
@ -1181,31 +1181,28 @@ HTML_TEMPLATE = """
            
            // 세션 풀 상세 정보 업데이트
            if (data.sessions) {
-                // Simple LAMA 세션
-                const lamaSession = data.sessions['simple-lama'] || data.sessions['simple_lama'] || {};
-                const lamaTotalSessions = lamaSession.total || 0;
-                const lamaInUse = lamaSession.in_use || 0;
-                const lamaAvailable = lamaSession.available || (lamaTotalSessions - lamaInUse);
-                document.getElementById('session-lama').textContent = `${lamaInUse}/${lamaTotalSessions} 사용중`;
+                // Helper function to update session info
+                const updateSessionInfo = (modelKey, elementId) => {
+                    const session = data.sessions[modelKey] || {};
+                    const inUse = session.in_use || 0;
+                    const total = session.total || 0;
+                    const min = session.min || 0;
+                    const max = session.max || 0;
+                    document.getElementById(elementId).textContent = `${inUse} / ${total} (min:${min}, max:${max})`;
+                };
                
-                // MIGAN 세션
-                const miganSession = data.sessions['migan'] || {};
-                const miganTotalSessions = miganSession.total || 0;
-                const miganInUse = miganSession.in_use || 0;
-                const miganAvailable = miganSession.available || (miganTotalSessions - miganInUse);
-                document.getElementById('session-migan').textContent = `${miganInUse}/${miganTotalSessions} 사용중`;
-                
-                // RemBG 세션
-                const rembgSession = data.sessions['rembg'] || {};
-                const rembgTotalSessions = rembgSession.total || 0;
-                const rembgInUse = rembgSession.in_use || 0;
-                const rembgAvailable = rembgSession.available || (rembgTotalSessions - rembgInUse);
-                document.getElementById('session-rembg').textContent = `${rembgInUse}/${rembgTotalSessions} 사용중`;
+                updateSessionInfo('simple_lama', 'session-lama');
+                updateSessionInfo('migan', 'session-migan');
+                updateSessionInfo('rembg', 'session-rembg');
                
                // 총 세션 수
-                const totalSessions = lamaTotalSessions + miganTotalSessions + rembgTotalSessions;
-                const totalInUse = lamaInUse + miganInUse + rembgInUse;
-                document.getElementById('session-total').textContent = `${totalInUse}/${totalSessions} 사용중`;
+                let totalInUse = 0;
+                let totalSessions = 0;
+                Object.values(data.sessions).forEach(s => {
+                    totalInUse += s.in_use || 0;
+                    totalSessions += s.total || 0;
+                });
+                document.getElementById('session-total').textContent = `${totalInUse} / ${totalSessions} 사용중`;
                
                console.log("세션 풀 상태:", data.sessions);
            } else {
--- a/logs/main.log
+++ b/logs/main.log
--- a/logs/main_server.log
+++ b/logs/main_server.log
@ -1,207 +1,71 @@
-INFO:     Started server process [41241]
-2025-08-29 21:56:12,908 - uvicorn.error - INFO - Started server process [41241]
+INFO:     Started server process [56165]
+2025-08-29 23:46:34,096 - uvicorn.error - INFO - Started server process [56165]
 INFO:     Waiting for application startup.
-2025-08-29 21:56:12,909 - uvicorn.error - INFO - Waiting for application startup.
-2025-08-29 21:56:12,910 - main - INFO - 🚀 인페인팅 서버 시작 중...
-2025-08-29 21:56:12,911 - main - INFO - ✅ 공유 객체를 app.state에 저장 완료
-2025-08-29 21:56:12,911 - main - INFO - 🔄 상태 저장 백그라운드 작업 생성 중...
-2025-08-29 21:56:12,911 - main - INFO - ✅ 상태 저장 백그라운드 작업 생성 완료
-2025-08-29 21:56:12,912 - main - INFO - 🚀 세션 풀 초기화 (CUDA 자동 감지)
-2025-08-29 21:56:12,912 - app.core.session_pool - INFO - Initializing session pools...
-2025-08-29 21:56:12,912 - app.core.session_pool - INFO - Initializing 4 sessions for simple_lama
-2025-08-29 21:56:16,159 - app.models.simple_lama - INFO - Loading Simple LAMA model...
-2025-08-29 21:56:20,613 - app.models.simple_lama - INFO - 실제 SimpleLama 모델 로딩 완료
-2025-08-29 21:56:20,614 - app.models.simple_lama - INFO - Simple LAMA model loaded successfully
-2025-08-29 21:56:20,615 - app.core.session_pool - INFO - Simple LAMA 모델 세션 로드 완료
-2025-08-29 21:56:20,616 - app.core.session_pool - INFO - Created session simple_lama_0
-2025-08-29 21:56:20,617 - app.models.simple_lama - INFO - Loading Simple LAMA model...
-2025-08-29 21:56:22,400 - app.models.simple_lama - INFO - 실제 SimpleLama 모델 로딩 완료
-2025-08-29 21:56:22,401 - app.models.simple_lama - INFO - Simple LAMA model loaded successfully
-2025-08-29 21:56:22,401 - app.core.session_pool - INFO - Simple LAMA 모델 세션 로드 완료
-2025-08-29 21:56:22,402 - app.core.session_pool - INFO - Created session simple_lama_1
-2025-08-29 21:56:22,402 - app.models.simple_lama - INFO - Loading Simple LAMA model...
-2025-08-29 21:56:24,088 - app.models.simple_lama - INFO - 실제 SimpleLama 모델 로딩 완료
-2025-08-29 21:56:24,088 - app.models.simple_lama - INFO - Simple LAMA model loaded successfully
-2025-08-29 21:56:24,089 - app.core.session_pool - INFO - Simple LAMA 모델 세션 로드 완료
-2025-08-29 21:56:24,089 - app.core.session_pool - INFO - Created session simple_lama_2
-2025-08-29 21:56:24,089 - app.models.simple_lama - INFO - Loading Simple LAMA model...
-2025-08-29 21:56:25,756 - app.models.simple_lama - INFO - 실제 SimpleLama 모델 로딩 완료
-2025-08-29 21:56:25,756 - app.models.simple_lama - INFO - Simple LAMA model loaded successfully
-2025-08-29 21:56:25,757 - app.core.session_pool - INFO - Simple LAMA 모델 세션 로드 완료
-2025-08-29 21:56:25,757 - app.core.session_pool - INFO - Created session simple_lama_3
-2025-08-29 21:56:25,757 - app.core.session_pool - INFO - Initializing 4 sessions for migan
-2025-08-29 21:56:25,822 - app.models.migan - INFO - Loading MIGAN ONNX model...
-2025-08-29 21:56:25,822 - app.models.migan - INFO - MIGAN ONNX 런타임 세션 생성 시도...
-2025-08-29 21:56:25,823 - app.models.migan - INFO - MIGAN ONNX providers 설정: ['CUDAExecutionProvider', 'CPUExecutionProvider']
-[0;93m2025-08-29 21:56:27.238315022 [W:onnxruntime:, transformer_memcpy.cc:74 ApplyImpl] 17 Memcpy nodes are added to the graph main_graph for CUDAExecutionProvider. It might have negative impact on performance (including unable to run CUDA graph). Set session_options.log_severity_level=1 to see the detail logs before this message.[m
-2025-08-29 21:56:28,580 - app.models.migan - INFO - MIGAN ONNX 세션 생성 완료. Providers: ['CUDAExecutionProvider', 'CPUExecutionProvider']
-2025-08-29 21:56:28,581 - app.models.migan - INFO - MIGAN ONNX model loaded successfully
-2025-08-29 21:56:28,581 - app.core.session_pool - INFO - MIGAN 모델 세션 로드 완료
-2025-08-29 21:56:28,582 - app.core.session_pool - INFO - Created session migan_0
-2025-08-29 21:56:28,582 - app.models.migan - INFO - Loading MIGAN ONNX model...
-2025-08-29 21:56:28,583 - app.models.migan - INFO - MIGAN ONNX 런타임 세션 생성 시도...
-2025-08-29 21:56:28,583 - app.models.migan - INFO - MIGAN ONNX providers 설정: ['CUDAExecutionProvider', 'CPUExecutionProvider']
-[0;93m2025-08-29 21:56:29.684943504 [W:onnxruntime:, transformer_memcpy.cc:74 ApplyImpl] 17 Memcpy nodes are added to the graph main_graph for CUDAExecutionProvider. It might have negative impact on performance (including unable to run CUDA graph). Set session_options.log_severity_level=1 to see the detail logs before this message.[m
-2025-08-29 21:56:29,852 - app.models.migan - INFO - MIGAN ONNX 세션 생성 완료. Providers: ['CUDAExecutionProvider', 'CPUExecutionProvider']
-2025-08-29 21:56:29,853 - app.models.migan - INFO - MIGAN ONNX model loaded successfully
-2025-08-29 21:56:29,853 - app.core.session_pool - INFO - MIGAN 모델 세션 로드 완료
-2025-08-29 21:56:29,854 - app.core.session_pool - INFO - Created session migan_1
-2025-08-29 21:56:29,854 - app.models.migan - INFO - Loading MIGAN ONNX model...
-2025-08-29 21:56:29,854 - app.models.migan - INFO - MIGAN ONNX 런타임 세션 생성 시도...
-2025-08-29 21:56:29,855 - app.models.migan - INFO - MIGAN ONNX providers 설정: ['CUDAExecutionProvider', 'CPUExecutionProvider']
-[0;93m2025-08-29 21:56:30.907277353 [W:onnxruntime:, transformer_memcpy.cc:74 ApplyImpl] 17 Memcpy nodes are added to the graph main_graph for CUDAExecutionProvider. It might have negative impact on performance (including unable to run CUDA graph). Set session_options.log_severity_level=1 to see the detail logs before this message.[m
-2025-08-29 21:56:31,072 - app.models.migan - INFO - MIGAN ONNX 세션 생성 완료. Providers: ['CUDAExecutionProvider', 'CPUExecutionProvider']
-2025-08-29 21:56:31,072 - app.models.migan - INFO - MIGAN ONNX model loaded successfully
-2025-08-29 21:56:31,073 - app.core.session_pool - INFO - MIGAN 모델 세션 로드 완료
-2025-08-29 21:56:31,073 - app.core.session_pool - INFO - Created session migan_2
-2025-08-29 21:56:31,074 - app.models.migan - INFO - Loading MIGAN ONNX model...
-2025-08-29 21:56:31,074 - app.models.migan - INFO - MIGAN ONNX 런타임 세션 생성 시도...
-2025-08-29 21:56:31,074 - app.models.migan - INFO - MIGAN ONNX providers 설정: ['CUDAExecutionProvider', 'CPUExecutionProvider']
-[0;93m2025-08-29 21:56:32.141164804 [W:onnxruntime:, transformer_memcpy.cc:74 ApplyImpl] 17 Memcpy nodes are added to the graph main_graph for CUDAExecutionProvider. It might have negative impact on performance (including unable to run CUDA graph). Set session_options.log_severity_level=1 to see the detail logs before this message.[m
-2025-08-29 21:56:32,306 - app.models.migan - INFO - MIGAN ONNX 세션 생성 완료. Providers: ['CUDAExecutionProvider', 'CPUExecutionProvider']
-2025-08-29 21:56:32,307 - app.models.migan - INFO - MIGAN ONNX model loaded successfully
-2025-08-29 21:56:32,308 - app.core.session_pool - INFO - MIGAN 모델 세션 로드 완료
-2025-08-29 21:56:32,308 - app.core.session_pool - INFO - Created session migan_3
-2025-08-29 21:56:32,309 - app.core.session_pool - INFO - Initializing 3 sessions for rembg
-2025-08-29 21:56:32,310 - app.models.rembg_model - INFO - Loading REMBG model (birefnet-general-lite)...
-2025-08-29 21:56:34,333 - app.models.rembg_model - INFO - rembg 모듈 임포트 성공 (세션 생성은 지연 로딩)
-2025-08-29 21:56:34,334 - app.models.rembg_model - INFO - 🔧 rembg 새 세션 생성 필요: birefnet-general-lite_cuda_True
-2025-08-29 21:56:34,334 - app.models.rembg_model - WARNING - rembg.sessions import 실패, 기본 방식 사용
-2025-08-29 21:56:34,335 - app.models.rembg_model - INFO - rembg 세션 생성 providers: ['CUDAExecutionProvider', 'CPUExecutionProvider']
-2025-08-29 21:56:47,648 - app.models.rembg_model - INFO - ✅ rembg 'birefnet-general-lite' GPU 가속로 동작 (providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'])
-2025-08-29 21:56:47,649 - app.models.rembg_model - INFO - REMBG model (birefnet-general-lite) loaded successfully
-2025-08-29 21:56:47,650 - app.core.session_pool - INFO - REMBG 모델 세션 로드 완료
-2025-08-29 21:56:47,650 - app.core.session_pool - INFO - Created session rembg_0
-2025-08-29 21:56:47,651 - app.models.rembg_model - INFO - Loading REMBG model (birefnet-general-lite)...
-2025-08-29 21:56:47,651 - app.models.rembg_model - INFO - rembg 모듈 임포트 성공 (세션 생성은 지연 로딩)
-2025-08-29 21:56:47,651 - app.models.rembg_model - INFO - 🔧 rembg 새 세션 생성 필요: birefnet-general-lite_cuda_True
-2025-08-29 21:56:47,652 - app.models.rembg_model - WARNING - rembg.sessions import 실패, 기본 방식 사용
-2025-08-29 21:56:47,652 - app.models.rembg_model - INFO - rembg 세션 생성 providers: ['CUDAExecutionProvider', 'CPUExecutionProvider']
-2025-08-29 21:57:00,508 - app.models.rembg_model - INFO - ✅ rembg 'birefnet-general-lite' GPU 가속로 동작 (providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'])
-2025-08-29 21:57:00,509 - app.models.rembg_model - INFO - REMBG model (birefnet-general-lite) loaded successfully
-2025-08-29 21:57:00,509 - app.core.session_pool - INFO - REMBG 모델 세션 로드 완료
-2025-08-29 21:57:00,510 - app.core.session_pool - INFO - Created session rembg_1
-2025-08-29 21:57:00,510 - app.models.rembg_model - INFO - Loading REMBG model (birefnet-general-lite)...
-2025-08-29 21:57:00,511 - app.models.rembg_model - INFO - rembg 모듈 임포트 성공 (세션 생성은 지연 로딩)
-2025-08-29 21:57:00,511 - app.models.rembg_model - INFO - 🔧 rembg 새 세션 생성 필요: birefnet-general-lite_cuda_True
-2025-08-29 21:57:00,512 - app.models.rembg_model - WARNING - rembg.sessions import 실패, 기본 방식 사용
-2025-08-29 21:57:00,512 - app.models.rembg_model - INFO - rembg 세션 생성 providers: ['CUDAExecutionProvider', 'CPUExecutionProvider']
-2025-08-29 21:57:13,223 - app.models.rembg_model - INFO - ✅ rembg 'birefnet-general-lite' GPU 가속로 동작 (providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'])
-2025-08-29 21:57:13,223 - app.models.rembg_model - INFO - REMBG model (birefnet-general-lite) loaded successfully
-2025-08-29 21:57:13,224 - app.core.session_pool - INFO - REMBG 모델 세션 로드 완료
-2025-08-29 21:57:13,224 - app.core.session_pool - INFO - Created session rembg_2
-2025-08-29 21:57:13,225 - app.core.session_pool - INFO - Session pools initialized successfully
-2025-08-29 21:57:13,225 - main - INFO - ✅ 세션 풀 초기화 완료
-2025-08-29 21:57:13,225 - app.core.worker_manager - INFO - Starting worker manager...
-2025-08-29 21:57:13,226 - app.core.worker_manager - INFO - Worker manager started with 4 workers
-2025-08-29 21:57:13,226 - main - INFO - ✅ 워커 매니저 시작 완료
-2025-08-29 21:57:13,227 - main - INFO - 🎉 인페인팅 서버 시작 완료!
-2025-08-29 21:57:13,227 - main - INFO - 🔄 상태 저장 백그라운드 작업 시작됨
+2025-08-29 23:46:34,097 - uvicorn.error - INFO - Waiting for application startup.
+2025-08-29 23:46:34,098 - main - INFO - 🚀 인페인팅 서버 시작 중...
+2025-08-29 23:46:34,098 - main - INFO - ✅ 공유 객체를 app.state에 저장 완료
+2025-08-29 23:46:34,098 - main - INFO - 🔄 상태 저장 백그라운드 작업 생성 중...
+2025-08-29 23:46:34,099 - main - INFO - ✅ 상태 저장 백그라운드 작업 생성 완료
+2025-08-29 23:46:34,099 - main - INFO - 🚀 세션 풀 초기화 (CUDA 자동 감지)
+2025-08-29 23:46:34,100 - app.core.session_pool - INFO - Initializing dynamic session pools...
+2025-08-29 23:46:34,100 - app.core.session_pool - INFO - Pre-loading 2 sessions for simple_lama
+2025-08-29 23:46:34,100 - main - INFO - 🔄 상태 저장 백그라운드 작업 시작됨
+2025-08-29 23:46:34,104 - app.core.session_pool - INFO - Creating new session simple_lama_0 for simple_lama...
+2025-08-29 23:46:37,259 - app.models.simple_lama - INFO - Loading Simple LAMA model...
+2025-08-29 23:46:41,562 - app.models.simple_lama - INFO - 실제 SimpleLama 모델 로딩 완료
+2025-08-29 23:46:41,563 - app.models.simple_lama - INFO - Simple LAMA model loaded successfully
+2025-08-29 23:46:41,564 - app.core.session_pool - INFO - Successfully created session simple_lama_0
+2025-08-29 23:46:41,565 - app.core.session_pool - INFO - Creating new session simple_lama_1 for simple_lama...
+2025-08-29 23:46:41,566 - app.models.simple_lama - INFO - Loading Simple LAMA model...
+2025-08-29 23:46:43,429 - app.models.simple_lama - INFO - 실제 SimpleLama 모델 로딩 완료
+2025-08-29 23:46:43,429 - app.models.simple_lama - INFO - Simple LAMA model loaded successfully
+2025-08-29 23:46:43,429 - app.core.session_pool - INFO - Successfully created session simple_lama_1
+2025-08-29 23:46:43,430 - app.core.session_pool - INFO - Pre-loading 2 sessions for migan
+2025-08-29 23:46:43,432 - app.core.session_pool - INFO - Creating new session migan_0 for migan...
+2025-08-29 23:46:43,491 - app.models.migan - INFO - Loading MIGAN ONNX model...
+2025-08-29 23:46:43,491 - app.models.migan - INFO - MIGAN ONNX 런타임 세션 생성 시도...
+2025-08-29 23:46:43,492 - app.models.migan - INFO - MIGAN ONNX providers 설정: ['CUDAExecutionProvider', 'CPUExecutionProvider']
+[0;93m2025-08-29 23:46:44.934790365 [W:onnxruntime:, transformer_memcpy.cc:74 ApplyImpl] 17 Memcpy nodes are added to the graph main_graph for CUDAExecutionProvider. It might have negative impact on performance (including unable to run CUDA graph). Set session_options.log_severity_level=1 to see the detail logs before this message.[m
+2025-08-29 23:46:46,412 - app.models.migan - INFO - MIGAN ONNX 세션 생성 완료. Providers: ['CUDAExecutionProvider', 'CPUExecutionProvider']
+2025-08-29 23:46:46,413 - app.models.migan - INFO - MIGAN ONNX model loaded successfully
+2025-08-29 23:46:46,413 - app.core.session_pool - INFO - Successfully created session migan_0
+2025-08-29 23:46:46,414 - app.core.session_pool - INFO - Creating new session migan_1 for migan...
+2025-08-29 23:46:46,414 - app.models.migan - INFO - Loading MIGAN ONNX model...
+2025-08-29 23:46:46,415 - app.models.migan - INFO - MIGAN ONNX 런타임 세션 생성 시도...
+2025-08-29 23:46:46,415 - app.models.migan - INFO - MIGAN ONNX providers 설정: ['CUDAExecutionProvider', 'CPUExecutionProvider']
+[0;93m2025-08-29 23:46:47.468053292 [W:onnxruntime:, transformer_memcpy.cc:74 ApplyImpl] 17 Memcpy nodes are added to the graph main_graph for CUDAExecutionProvider. It might have negative impact on performance (including unable to run CUDA graph). Set session_options.log_severity_level=1 to see the detail logs before this message.[m
+2025-08-29 23:46:47,630 - app.models.migan - INFO - MIGAN ONNX 세션 생성 완료. Providers: ['CUDAExecutionProvider', 'CPUExecutionProvider']
+2025-08-29 23:46:47,631 - app.models.migan - INFO - MIGAN ONNX model loaded successfully
+2025-08-29 23:46:47,632 - app.core.session_pool - INFO - Successfully created session migan_1
+2025-08-29 23:46:47,632 - app.core.session_pool - INFO - Pre-loading 2 sessions for rembg
+2025-08-29 23:46:47,634 - app.core.session_pool - INFO - Creating new session rembg_0 for rembg...
+2025-08-29 23:46:47,634 - app.core.session_pool - INFO - Creating new session rembg_1 for rembg...
+2025-08-29 23:46:47,638 - app.models.rembg_model - INFO - Loading REMBG model (birefnet-general-lite)...
+2025-08-29 23:46:49,612 - app.models.rembg_model - INFO - rembg 모듈 임포트 성공 (세션 생성은 지연 로딩)
+2025-08-29 23:46:49,612 - app.models.rembg_model - INFO - 🔧 rembg 새 세션 생성 필요: birefnet-general-lite_cuda_True
+2025-08-29 23:46:49,613 - app.models.rembg_model - WARNING - rembg.sessions import 실패, 기본 방식 사용
+2025-08-29 23:46:49,613 - app.models.rembg_model - INFO - rembg 세션 생성 providers: ['CUDAExecutionProvider', 'CPUExecutionProvider']
+2025-08-29 23:47:02,613 - app.models.rembg_model - INFO - ✅ rembg 'birefnet-general-lite' GPU 가속로 동작 (providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'])
+2025-08-29 23:47:02,614 - app.models.rembg_model - INFO - REMBG model (birefnet-general-lite) loaded successfully
+2025-08-29 23:47:02,615 - app.models.rembg_model - INFO - Loading REMBG model (birefnet-general-lite)...
+2025-08-29 23:47:02,615 - app.models.rembg_model - INFO - rembg 모듈 임포트 성공 (세션 생성은 지연 로딩)
+2025-08-29 23:47:02,616 - app.models.rembg_model - INFO - 🔧 rembg 새 세션 생성 필요: birefnet-general-lite_cuda_True
+2025-08-29 23:47:02,617 - app.models.rembg_model - WARNING - rembg.sessions import 실패, 기본 방식 사용
+2025-08-29 23:47:02,617 - app.models.rembg_model - INFO - rembg 세션 생성 providers: ['CUDAExecutionProvider', 'CPUExecutionProvider']
+2025-08-29 23:47:15,228 - app.models.rembg_model - INFO - ✅ rembg 'birefnet-general-lite' GPU 가속로 동작 (providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'])
+2025-08-29 23:47:15,229 - app.models.rembg_model - INFO - REMBG model (birefnet-general-lite) loaded successfully
+2025-08-29 23:47:15,230 - app.core.session_pool - INFO - Successfully created session rembg_0
+2025-08-29 23:47:15,230 - app.core.session_pool - INFO - Successfully created session rembg_1
+2025-08-29 23:47:15,232 - app.core.session_pool - INFO - Session pools initialized successfully
+2025-08-29 23:47:15,232 - main - INFO - ✅ 세션 풀 초기화 완료
+2025-08-29 23:47:15,233 - app.core.worker_manager - INFO - Starting worker manager...
+2025-08-29 23:47:15,234 - app.core.worker_manager - INFO - Worker manager started with 10 workers
+2025-08-29 23:47:15,234 - main - INFO - ✅ 워커 매니저 시작 완료
+2025-08-29 23:47:15,234 - main - INFO - 🎉 인페인팅 서버 시작 완료!
+2025-08-29 23:47:15,235 - app.core.session_pool - INFO - Idle session reaper started. Timeout: 1800s, Check Interval: 60s
 INFO:     Application startup complete.
-2025-08-29 21:57:16,343 - uvicorn.error - INFO - Application startup complete.
+2025-08-29 23:47:15,235 - uvicorn.error - INFO - Application startup complete.
 INFO:     Uvicorn running on http://0.0.0.0:8008 (Press CTRL+C to quit)
-2025-08-29 21:57:16,353 - uvicorn.error - INFO - Uvicorn running on http://0.0.0.0:8008 (Press CTRL+C to quit)
-INFO:     127.0.0.1:54262 - "GET /api/v1/health HTTP/1.1" 404 Not Found
-INFO:     127.0.0.1:54274 - "GET /api/v1/health HTTP/1.1" 404 Not Found
-INFO:     192.168.0.119:61112 - "GET /health HTTP/1.1" 200 OK
-INFO:     192.168.0.119:61231 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 21:59:51,134 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 21:59:57,813 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 6.722s
-INFO:     192.168.0.119:61232 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:61299 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:00:02,737 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:00:18,011 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 15.314s
-INFO:     192.168.0.119:61300 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:61373 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:00:22,330 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:00:23,972 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 1.694s
-INFO:     192.168.0.119:61374 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:61417 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:00:28,723 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:00:30,117 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 1.445s
-INFO:     192.168.0.119:61418 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:61459 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:00:35,326 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:00:36,758 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 1.471s
-INFO:     192.168.0.119:61462 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:61491 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:00:41,071 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:00:42,651 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 1.618s
-INFO:     192.168.0.119:61492 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:61507 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:00:47,196 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:00:48,619 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 1.450s
-INFO:     192.168.0.119:61508 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:61521 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:00:52,857 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:00:54,558 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 1.736s
-INFO:     192.168.0.119:61522 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:61553 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:00:58,326 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:00:59,657 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 1.371s
-INFO:     192.168.0.119:61554 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:61568 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:01:04,284 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:01:05,630 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 1.378s
-INFO:     192.168.0.119:61569 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:61766 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:01:57,794 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:02:00,934 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 3.179s
-INFO:     192.168.0.119:61767 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:61819 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:02:06,382 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:02:07,866 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 1.514s
-INFO:     192.168.0.119:61820 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:61864 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:02:15,220 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:02:16,792 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 1.615s
-INFO:     192.168.0.119:61876 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:61923 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:02:22,461 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:02:23,790 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 1.377s
-INFO:     192.168.0.119:61924 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:62164 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:03:01,528 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:03:04,020 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 2.532s
-INFO:     192.168.0.119:62165 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:62225 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:03:13,341 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:03:15,572 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 2.260s
-INFO:     192.168.0.119:62226 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:62291 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:03:21,500 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:03:29,405 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 7.941s
-INFO:     192.168.0.119:62292 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:62546 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:04:02,266 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:04:12,367 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 10.139s
-INFO:     192.168.0.119:62551 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:62613 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:04:16,160 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:04:18,086 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 1.959s
-INFO:     192.168.0.119:62614 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:62653 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:04:23,356 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:04:25,222 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 1.918s
-INFO:     192.168.0.119:62654 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:62769 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:04:49,806 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:04:51,747 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 1.978s
-INFO:     192.168.0.119:62770 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     192.168.0.119:62996 - "GET /api/v1/model HTTP/1.1" 200 OK
-2025-08-29 22:05:37,916 - app.models.simple_lama - INFO - 실제 SimpleLama 모델로 인페인팅 수행
-2025-08-29 22:05:41,193 - app.core.worker_manager - INFO - 'simple-lama' inpainting processed in 3.324s
-INFO:     Shutting down
-2025-08-29 22:05:41,242 - uvicorn.error - INFO - Shutting down
-INFO:     192.168.0.119:62998 - "POST /api/v1/inpaint HTTP/1.1" 200 OK
-INFO:     Waiting for application shutdown.
-2025-08-29 22:05:41,344 - uvicorn.error - INFO - Waiting for application shutdown.
-2025-08-29 22:05:41,345 - main - INFO - 🛑 인페인팅 서버 종료 중...
-2025-08-29 22:05:41,345 - app.core.worker_manager - INFO - Stopping worker manager...
-2025-08-29 22:05:41,346 - app.core.worker_manager - INFO - Worker manager stopped
-2025-08-29 22:05:41,346 - main - INFO - ✅ 워커 매니저 중지 완료
-2025-08-29 22:05:41,347 - main - INFO - 👋 인페인팅 서버 종료 완료
-INFO:     Application shutdown complete.
-2025-08-29 22:05:41,347 - uvicorn.error - INFO - Application shutdown complete.
-INFO:     Finished server process [41241]
-2025-08-29 22:05:41,348 - uvicorn.error - INFO - Finished server process [41241]
+2025-08-29 23:47:15,236 - uvicorn.error - INFO - Uvicorn running on http://0.0.0.0:8008 (Press CTRL+C to quit)
+INFO:     127.0.0.1:57044 - "GET /api/v1/health HTTP/1.1" 200 OK
+INFO:     127.0.0.1:57060 - "GET /api/v1/health HTTP/1.1" 200 OK
--- a/logs/main_server.pid
+++ b/logs/main_server.pid
@ -1 +1 @@
-41241
+56165
--- a/logs/monitoring.log
+++ b/logs/monitoring.log
@ -1,10 +1,12 @@
-INFO:     Started server process [41671]
+INFO:     Started server process [56411]
 INFO:     Waiting for application startup.
 Fan control not available
 INFO:     Application startup complete.
 INFO:     Uvicorn running on http://0.0.0.0:8888 (Press CTRL+C to quit)
-INFO:     127.0.0.1:50826 - "GET /api/simple HTTP/1.1" 200 OK
-INFO:     Shutting down
-INFO:     Waiting for application shutdown.
-INFO:     Application shutdown complete.
-INFO:     Finished server process [41671]
+INFO:     127.0.0.1:43630 - "GET /api/simple HTTP/1.1" 200 OK
+Task exception was never retrieved
+future: <Task finished name='Task-4' coro=<health_check_and_restart() done, defined at /home/ckh08045/work/inpaintServer/app/monitoring/dashboard.py:2084> exception=AttributeError("module 'asyncio' has no attribute 'to_thread'")>
+Traceback (most recent call last):
+  File "/home/ckh08045/work/inpaintServer/app/monitoring/dashboard.py", line 2094, in health_check_and_restart
+    response = await asyncio.to_thread(requests.get, health_url, timeout=10)
+AttributeError: module 'asyncio' has no attribute 'to_thread'
--- a/logs/monitoring.pid
+++ b/logs/monitoring.pid
@ -1 +1 @@
-41671
+56411
--- a/scripts/check_onnx.py
+++ b/scripts/check_onnx.py
@ -0,0 +1,45 @@
+import onnxruntime as ort
+import sys
+
+def check_onnx_batch_support(model_path):
+    """
+    ONNX 모델을 로드하여 배치 처리 지원 여부를 확인하고,
+    입력 및 출력 정보를 출력하는 함수
+    """
+    try:
+        session = ort.InferenceSession(model_path)
+        print(f"✅ 모델 로드 성공: {model_path}")
+
+        print("\n[입력 정보]")
+        for i, input_meta in enumerate(session.get_inputs()):
+            shape = input_meta.shape
+            print(f"  - 입력 #{i}")
+            print(f"    - 이름: {input_meta.name}")
+            print(f"    - 형태: {shape}")
+            # 배치 차원이 문자열(동적)인지 확인
+            if isinstance(shape[0], str) or shape[0] is None:
+                print(f"    - ✅ 배치 처리 지원 (동적 배치 크기 '{shape[0]}')")
+            else:
+                print(f"    - ⚠️ 배치 처리를 지원하지 않거나, 배치 크기가 '{shape[0]}'으로 고정됨")
+
+        print("\n[출력 정보]")
+        for i, output_meta in enumerate(session.get_outputs()):
+            shape = output_meta.shape
+            print(f"  - 출력 #{i}")
+            print(f"    - 이름: {output_meta.name}")
+            print(f"    - 형태: {shape}")
+            if isinstance(shape[0], str) or shape[0] is None:
+                print(f"    - ✅ 배치 처리 지원 (동적 배치 크기 '{shape[0]}')")
+
+    except Exception as e:
+        print(f"❌ 오류 발생: 모델을 로드하거나 분석하는 중 문제가 발생했습니다.")
+        print(f"   {model_path}: {e}")
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print("사용법: python check_onnx.py <ONNX_FILE_PATH_1> [ONNX_FILE_PATH_2] ...")
+    else:
+        for model_path in sys.argv[1:]:
+            print("-" * 50)
+            check_onnx_batch_support(model_path)
+            print("-" * 50)
--- a/scripts/start_server.sh
+++ b/scripts/start_server.sh
@ -167,7 +167,7 @@ check_environment() {
    # 가상환경 확인
    log_info "가상환경 경로 확인: $VENV_PATH"
    if [ ! -f "$VENV_PATH/bin/activate" ]; then
-        log_error "가상환경을 찾을 수 없습니다: $VENV_PATH"
+        log_error "가상환경을 찾을 수 없습니다: $VENV_PATH/bin/activate 경로가 존재하지 않습니다."
        exit 1
    fi
    
--- a/status.json
+++ b/status.json
@ -1,33 +1,75 @@
 {
  "worker_status": {
    "running": true,
-    "total_workers": 4,
+    "total_workers": 10,
    "queue_size": 0,
    "workers_by_status": {
      "idle": [
        {
-          "id": "worker_53efd09a",
+          "id": "worker_fcfdab20",
          "status": "idle",
          "task_count": 0,
          "error_count": 0,
          "last_task_at": null
        },
        {
-          "id": "worker_c1ca06d0",
+          "id": "worker_07900527",
          "status": "idle",
          "task_count": 0,
          "error_count": 0,
          "last_task_at": null
        },
        {
-          "id": "worker_8f181ed5",
+          "id": "worker_fa21a361",
          "status": "idle",
          "task_count": 0,
          "error_count": 0,
          "last_task_at": null
        },
        {
-          "id": "worker_0648365c",
+          "id": "worker_aa0517fd",
+          "status": "idle",
+          "task_count": 0,
+          "error_count": 0,
+          "last_task_at": null
+        },
+        {
+          "id": "worker_f6949bff",
+          "status": "idle",
+          "task_count": 0,
+          "error_count": 0,
+          "last_task_at": null
+        },
+        {
+          "id": "worker_2dc147eb",
+          "status": "idle",
+          "task_count": 0,
+          "error_count": 0,
+          "last_task_at": null
+        },
+        {
+          "id": "worker_20ae03ff",
+          "status": "idle",
+          "task_count": 0,
+          "error_count": 0,
+          "last_task_at": null
+        },
+        {
+          "id": "worker_ed0f495e",
+          "status": "idle",
+          "task_count": 0,
+          "error_count": 0,
+          "last_task_at": null
+        },
+        {
+          "id": "worker_06d35af9",
+          "status": "idle",
+          "task_count": 0,
+          "error_count": 0,
+          "last_task_at": null
+        },
+        {
+          "id": "worker_325beb67",
          "status": "idle",
          "task_count": 0,
          "error_count": 0,
@ -42,70 +84,52 @@
  },
  "session_status": {
    "simple_lama": {
-      "total": 4,
+      "min": 2,
+      "max": 4,
+      "total": 2,
      "in_use": 0,
-      "available": 4
+      "available": 2
    },
    "migan": {
-      "total": 4,
+      "min": 2,
+      "max": 4,
+      "total": 2,
      "in_use": 0,
-      "available": 4
+      "available": 2
    },
    "rembg": {
-      "total": 3,
+      "min": 2,
+      "max": 4,
+      "total": 2,
      "in_use": 0,
-      "available": 3
+      "available": 2
    }
  },
  "api_stats": {
-    "total_requests": 47,
-    "successful_requests": 45,
-    "failed_requests": 2,
-    "success_rate": 95.74468085106383,
+    "total_requests": 2,
+    "successful_requests": 2,
+    "failed_requests": 0,
+    "success_rate": 100.0,
    "endpoint_usage": {
-      "GET /api/v1/health": 2,
-      "GET /health": 1,
-      "GET /api/v1/model": 22,
-      "POST /api/v1/inpaint": 22
+      "GET /api/v1/health": 2
    },
    "endpoint_stats": {
      "GET /api/v1/health": {
        "count": 2,
-        "avg_time": 0.002225041389465332,
-        "min_time": 0.0013301372528076172,
-        "max_time": 0.003119945526123047,
-        "current_concurrent": 0
-      },
-      "GET /health": {
-        "count": 1,
-        "avg_time": 0.00873565673828125,
-        "min_time": 0.00873565673828125,
-        "max_time": 0.00873565673828125,
-        "current_concurrent": 0
-      },
-      "GET /api/v1/model": {
-        "count": 22,
-        "avg_time": 0.005265561017123136,
-        "min_time": 0.0028429031372070312,
-        "max_time": 0.009748697280883789,
-        "current_concurrent": 0
-      },
-      "POST /api/v1/inpaint": {
-        "count": 22,
-        "avg_time": 3.44641282341697,
-        "min_time": 1.4387421607971191,
-        "max_time": 15.391778945922852,
+        "avg_time": 0.0017114877700805664,
+        "min_time": 0.0015041828155517578,
+        "max_time": 0.001918792724609375,
        "current_concurrent": 0
      }
    },
-    "average_response_time": 1.615959791426963,
-    "min_response_time": 0.0013301372528076172,
-    "max_response_time": 15.391778945922852,
+    "average_response_time": 0.0017114877700805664,
+    "min_response_time": 0.0015041828155517578,
+    "max_response_time": 0.001918792724609375,
    "current_concurrent": 0,
    "max_concurrent": 1,
-    "requests_per_second": 0.0826958620348505,
-    "uptime": 568.3476638793945,
+    "requests_per_second": 0.001614421372065027,
+    "uptime": 1238.8339467048645,
    "recent_errors": []
  },
-  "timestamp": 1756472741.2439997
+  "timestamp": 1756480032.9159214
 }
--- a/tests/generate_test_images.py
+++ b/tests/generate_test_images.py
@ -0,0 +1,51 @@
+from PIL import Image, ImageDraw
+from pathlib import Path
+
+# 설정
+WIDTH, HEIGHT = 800, 1000
+IMAGE_FILENAME = "test_image.png"
+MASK_FILENAME = "test_mask.png"
+SAVE_DIR = Path(__file__).parent
+
+def generate_images():
+    """테스트용 이미지와 마스크를 생성합니다."""
+    
+    # 1. 테스트 이미지 생성 (간단한 그라데이션)
+    print(f"'{IMAGE_FILENAME}' 생성 중 ({WIDTH}x{HEIGHT})...")
+    image = Image.new("RGB", (WIDTH, HEIGHT))
+    draw = ImageDraw.Draw(image)
+    
+    for i in range(HEIGHT):
+        r = int(255 * (i / HEIGHT))
+        g = int(255 * (1 - i / HEIGHT))
+        b = 128
+        draw.line([(0, i), (WIDTH, i)], fill=(r, g, b))
+        
+    image_path = SAVE_DIR / IMAGE_FILENAME
+    image.save(image_path)
+    print(f"✅ 이미지를 성공적으로 저장했습니다: {image_path}")
+
+    # 2. 마스크 이미지 생성 (중앙에 20% 크기의 사각형)
+    print(f"'{MASK_FILENAME}' 생성 중 ({WIDTH}x{HEIGHT})...")
+    mask = Image.new("L", (WIDTH, HEIGHT), 0)  # 검은색 배경
+    draw = ImageDraw.Draw(mask)
+    
+    # 중앙에 약 20% 크기의 흰색 사각형 그리기
+    mask_width = int(WIDTH * 0.45) # 800 * 0.45 = 360
+    mask_height = int(HEIGHT * 0.45) # 1000 * 0.45 = 450
+    # 360 * 450 = 162,000. Total 800,000. 162000/800000 = 0.2025 (약 20%)
+    
+    x1 = (WIDTH - mask_width) // 2
+    y1 = (HEIGHT - mask_height) // 2
+    x2 = x1 + mask_width
+    y2 = y1 + mask_height
+    
+    draw.rectangle([x1, y1, x2, y2], fill=255) # 흰색
+    
+    mask_path = SAVE_DIR / MASK_FILENAME
+    mask.save(mask_path)
+    print(f"✅ 마스크를 성공적으로 저장했습니다: {mask_path}")
+
+
+if __name__ == "__main__":
+    generate_images()
--- a/tests/load_test_lama.py
+++ b/tests/load_test_lama.py
@ -0,0 +1,75 @@
+import asyncio
+import httpx
+import base64
+from pathlib import Path
+import time
+
+# 테스트 설정
+BASE_URL = "http://localhost:8008/api/v1/inpaint"
+# 테스트에 사용할 이미지와 마스크 경로
+IMAGE_PATH = Path(__file__).parent / "test_image.png"
+MASK_PATH = Path(__file__).parent / "test_mask.png"
+CONCURRENT_REQUESTS = 8  # 동시에 보낼 요청 수 (min_sessions < 요청 수 < max_sessions * 2 정도로 설정)
+
+def encode_image(image_path: Path) -> str:
+    """이미지 파일을 base64로 인코딩"""
+    try:
+        with open(image_path, "rb") as f:
+            return base64.b64encode(f.read()).decode('utf-8')
+    except FileNotFoundError as e:
+        print(f"오류: 테스트 이미지를 찾을 수 없습니다 - {e}")
+        print("먼저 tests/generate_test_images.py 를 실행하여 테스트 이미지를 생성해주세요.")
+        raise
+
+async def send_request(client: httpx.AsyncClient, request_num: int, image_b64: str, mask_b64: str):
+    """단일 API 요청을 보냄"""
+    payload = {
+        "image": image_b64,
+        "mask": mask_b64,
+        "model_name": "simple-lama",
+        "ldm_steps": 20,
+        "ldm_sampler": "plms",
+        "hd_strategy": "Original",
+        "hd_strategy_crop_margin": 128,
+        "hd_strategy_crop_trigger_size": 800,
+        "hd_strategy_resize_limit": 1280
+    }
+    
+    print(f"[{request_num:02d}] 요청 시작...")
+    start_time = time.time()
+    try:
+        response = await client.post(BASE_URL, json=payload, timeout=120)
+        duration = time.time() - start_time
+        
+        if response.status_code == 200:
+            print(f"✅ [{request_num:02d}] 요청 성공 ({duration:.2f}초)")
+            # with open(f"result_{request_num}.png", "wb") as f:
+            #     f.write(response.content)
+        else:
+            print(f"❌ [{request_num:02d}] 요청 실패 - 상태 코드: {response.status_code} ({duration:.2f}초)")
+            print(f"   응답: {response.text[:200]}")
+            
+    except httpx.ReadTimeout:
+        duration = time.time() - start_time
+        print(f"❌ [{request_num:02d}] 요청 시간 초과 ({duration:.2f}초)")
+    except httpx.ConnectError as e:
+        print(f"❌ [{request_num:02d}] 연결 실패: {e}")
+
+async def main():
+    """메인 실행 함수"""
+    print("SimpleLama 부하 테스트 시작...")
+    print(f"동시 요청 수: {CONCURRENT_REQUESTS}")
+    
+    # 이미지 파일 인코딩
+    image_b64 = encode_image(IMAGE_PATH)
+    mask_b64 = encode_image(MASK_PATH)
+    
+    async with httpx.AsyncClient() as client:
+        tasks = [
+            send_request(client, i, image_b64, mask_b64)
+            for i in range(CONCURRENT_REQUESTS)
+        ]
+        await asyncio.gather(*tasks)
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/test_image.png
+++ b/tests/test_image.png
--- a/tests/test_mask.png
+++ b/tests/test_mask.png