인페인팅 및 배경 제거 API에 요청 ID 및 클라이언트 IP 로깅 기능을 추가하였으며, 이미지 및 마스크의 메타 정보를 로그에 기록하도록 개선하였습니다. 배치 처리 시 배치 ID를 부여하고, VRAM 사용량 및 세션 상태를 보다 상세히 로깅하도록 수정하였습니다. GPU 메모리 정보 반환 형식을 통일하고, OpenCV 최적화를 비활성화하였습니다. 설정 파일에서 최대 세션 수 및 VRAM 임계값을 조정하였습니다.

2025-10-04 12:30:59 +00:00 · 2025-10-04 12:30:59 +00:00 · 9619fbc1db
parent 1d09de91e2
commit 9619fbc1db
8 changed files with 221 additions and 99 deletions
--- a/app/api/endpoints.py
+++ b/app/api/endpoints.py
@ -23,6 +23,7 @@ from ..utils.image_utils import (
    pil_to_bytes, numpy_to_bytes, adjust_mask, gen_frontend_mask
 )
 import base64
 import uuid
 import io
 from ..monitoring.dashboard import monitoring_data
 from .stats import router as stats_router
@ -237,16 +238,83 @@ async def get_server_config():
 async def inpaint_image(
    request: InpaintRequest,
    response_format: ResponseFormat = Query(ResponseFormat.binary, description="응답 형식 (기존 클라이언트 호환을 위해 기본값: binary)"),
-    image_format: ImageFormat = Query(ImageFormat.png, description="이미지 형식")
+    image_format: ImageFormat = Query(ImageFormat.png, description="이미지 형식"),
    http_request: Request = None,
 ):
    """인페인팅 API (iopaint 호환)"""
    start_time = time.time()
    alpha_channel = None  # 변수 초기화
    try:
        req_id = f"req_{uuid.uuid4().hex[:8]}"
        client_ip = None
        try:
            if http_request and http_request.client:
                client_ip = http_request.client.host
        except Exception:
            client_ip = None
        # 원본 base64 크기(바이트) 계산
        try:
            raw_img_b64 = request.image.split(',', 1)[1] if isinstance(request.image, str) and ',' in request.image else request.image
            img_bytes_len = len(base64.b64decode(raw_img_b64)) if raw_img_b64 else 0
        except Exception:
            img_bytes_len = 0
        try:
            raw_mask_b64 = request.mask.split(',', 1)[1] if isinstance(request.mask, str) and ',' in request.mask else request.mask
            mask_bytes_len = len(base64.b64decode(raw_mask_b64)) if raw_mask_b64 else 0
        except Exception:
            mask_bytes_len = 0
        # base64 이미지 디코딩
        image, alpha_channel, info, ext = decode_base64_to_image(request.image)
-        mask, _, _, _ = decode_base64_to_image(request.mask, gray=True)
+        mask, _, mask_info, mask_ext = decode_base64_to_image(request.mask, gray=True)
        # 이미지/마스크 메타 로깅
        try:
            img_h, img_w = image.shape[:2]
            img_ch = image.shape[2] if len(image.shape) == 3 else 1
            meta_image = {
                "request_id": req_id,
                "client_ip": client_ip,
                "kind": "image",
                "format": info.get("format"),
                "mode": info.get("mode"),
                "ext": ext,
                "pil_size": info.get("size"),
                "np_shape": tuple(image.shape),
                "h": img_h,
                "w": img_w,
                "channels": img_ch,
                "dtype": str(image.dtype),
                "bytes": img_bytes_len,
                "has_alpha": alpha_channel is not None,
            }
            logger.info(f"[INPAINT_META] {meta_image}")
        except Exception:
            pass
        try:
            mask_h, mask_w = mask.shape[:2]
            mask_ch = mask.shape[2] if len(mask.shape) == 3 else 1
            meta_mask = {
                "request_id": req_id,
                "client_ip": client_ip,
                "kind": "mask",
                "format": mask_info.get("format") if isinstance(mask_info, dict) else None,
                "mode": mask_info.get("mode") if isinstance(mask_info, dict) else None,
                "ext": mask_ext if 'mask_ext' in locals() else None,
                "pil_size": mask_info.get("size") if isinstance(mask_info, dict) else None,
                "np_shape": tuple(mask.shape),
                "h": mask_h,
                "w": mask_w,
                "channels": mask_ch,
                "dtype": str(mask.dtype),
                "bytes": mask_bytes_len,
            }
            logger.info(f"[INPAINT_META] {meta_mask}")
        except Exception:
            pass
        # alpha_channel이 None인 경우 기본값 설정
        if alpha_channel is None:
@ -354,15 +422,54 @@ async def inpaint_image(
 async def remove_background(
    request: RemoveBGRequest,
    response_format: ResponseFormat = Query(ResponseFormat.base64, description="응답 형식"),
-    image_format: ImageFormat = Query(ImageFormat.png, description="이미지 형식")
+    image_format: ImageFormat = Query(ImageFormat.png, description="이미지 형식"),
    http_request: Request = None,
 ):
    """배경 제거 API (iopaint 호환)"""
    start_time = time.time()
    alpha_channel = None  # 변수 초기화
    try:
        req_id = f"req_{uuid.uuid4().hex[:8]}"
        client_ip = None
        try:
            if http_request and http_request.client:
                client_ip = http_request.client.host
        except Exception:
            client_ip = None
        # 원본 base64 크기(바이트) 계산
        try:
            raw_img_b64 = request.image.split(',', 1)[1] if isinstance(request.image, str) and ',' in request.image else request.image
            img_bytes_len = len(base64.b64decode(raw_img_b64)) if raw_img_b64 else 0
        except Exception:
            img_bytes_len = 0
        # base64 이미지 디코딩
        image, alpha_channel, info, ext = decode_base64_to_image(request.image)
        # 이미지 메타 로깅
        try:
            img_h, img_w = image.shape[:2]
            img_ch = image.shape[2] if len(image.shape) == 3 else 1
            meta_image = {
                "request_id": req_id,
                "client_ip": client_ip,
                "kind": "image",
                "format": info.get("format"),
                "mode": info.get("mode"),
                "ext": ext,
                "pil_size": info.get("size"),
                "np_shape": tuple(image.shape),
                "h": img_h,
                "w": img_w,
                "channels": img_ch,
                "dtype": str(image.dtype),
                "bytes": img_bytes_len,
                "has_alpha": alpha_channel is not None,
            }
            logger.info(f"[REMOVEBG_META] {meta_image}")
        except Exception:
            pass
        # alpha_channel이 None인 경우 기본값 설정
        if alpha_channel is None:
--- a/app/core/batch_manager.py
+++ b/app/core/batch_manager.py
@ -105,10 +105,12 @@ class BatchManager:
                    break # 대기 시간 초과
            logger.info(f"Creating a new batch with {len(batch)} jobs.")
            # 배치 ID 부여
            batch_id = f"batch_{uuid.uuid4().hex[:8]}"
            # 배치를 처리할 별도의 태스크를 생성하여 루프가 다른 배치를 만드는 것을 막지 않도록 합니다.
-            asyncio.create_task(self._process_batch(batch))
+            asyncio.create_task(self._process_batch(batch, batch_id))
-    async def _process_batch(self, batch: List[BatchJob]):
+    async def _process_batch(self, batch: List[BatchJob], batch_id: str):
        """
        생성된 배치를 WorkerManager에 전달하여 처리하고 결과를 전파합니다.
        """
@ -117,7 +119,7 @@ class BatchManager:
        try:
            # WorkerManager에 배치 처리를 요청합니다.
            # worker_manager의 process_inpaint는 이제 배치 데이터를 처리할 수 있어야 합니다.
-            results = await worker_manager.process_inpaint_batch(batch_data)
+            results = await worker_manager.process_inpaint_batch(batch_data, batch_id=batch_id)
            if len(results) != len(batch):
                raise ValueError(f"Result count ({len(results)}) does not match batch size ({len(batch)}).")
@ -128,7 +130,7 @@ class BatchManager:
                    job.future.set_exception(result)
                else:
                    job.future.set_result(result)
-            logger.info(f"Successfully processed batch of {len(batch)} jobs.")
+            logger.info(f"✅ Batch Completed (id={batch_id}, size={len(batch)})")
        except Exception as e:
            logger.error(f"Failed to process batch: {e}", exc_info=True)
--- a/app/core/config.py
+++ b/app/core/config.py
@ -90,7 +90,7 @@ class Settings(BaseSettings):
    # 동적 세션 풀/메모리
    # =========================
    SIMPLE_LAMA_MIN_SESSIONS: int = 4
-    SIMPLE_LAMA_MAX_SESSIONS: int = 6
+    SIMPLE_LAMA_MAX_SESSIONS: int = 8
    # x86에서는 MIGAN 미로딩(지연 로딩) 기본 → MIN=0
    MIGAN_MIN_SESSIONS: int = 2 if IS_JETSON else 1
@ -100,12 +100,12 @@ class Settings(BaseSettings):
    REMBG_MAX_SESSIONS: int = 6
    # 여유 VRAM 비율(남은 VRAM이 이 값보다 커야 세션 추가)
-    SESSION_VRAM_THRESHOLD: float = 0.30
+    SESSION_VRAM_THRESHOLD: float = 0.12
    SESSION_IDLE_TIMEOUT: int = 1800  # 초 (0이면 비활성)
    # 마이크로 배치(SimpleLAMA)
    USE_MICRO_BATCHING: bool = True
-    MICRO_BATCH_SIZE: int = 4
+    MICRO_BATCH_SIZE: int = 8
    MICRO_BATCH_TIMEOUT_MS: int = 100
    # 사전 확정 세션(플랫폼 감안 기본치)
--- a/app/core/session_pool.py
+++ b/app/core/session_pool.py
@ -91,7 +91,16 @@ class SessionPool:
            if not gpu_info or 'used' not in gpu_info:
                vram_usage = "VRAM: N/A"
            else:
-                vram_usage = f"VRAM: {(gpu_info['used'] / 1024):.1f}/{(gpu_info['total'] / 1024):.1f} GB ({gpu_info['usage_percent']:.1f}%)"
+                unit = gpu_info.get('unit', '')
                used = gpu_info.get('used', 0)
                total = gpu_info.get('total', 0)
                usage_percent = gpu_info.get('usage_percent', 0)
                if unit == 'MiB':
                    used_gb = used / 1024.0
                    total_gb = total / 1024.0
                    vram_usage = f"VRAM: {used_gb:.1f}/{total_gb:.1f} GB ({usage_percent:.1f}%)"
                else:
                    vram_usage = f"VRAM: {used:.1f}/{total:.1f} {unit or 'GiB'} ({usage_percent:.1f}%)"
            session_counts = ", ".join([f"{mt.value}: {len(p)}" for mt, p in self.pools.items()])
@ -175,7 +184,9 @@ class SessionPool:
                    if not session.in_use:
                        session.in_use = True
                        session.mark_used()
-                        logger.debug(f"Acquired existing session {session.session_id}")
+                        total = len(self.pools[model_type])
                        in_use = sum(1 for s in self.pools[model_type] if s.in_use)
                        logger.info(f"[{model_type.value}] acquire {session.session_id} (in_use={in_use}/{total})")
                        return session
                if len(self.pools[model_type]) < max_sessions:
@ -202,7 +213,15 @@ class SessionPool:
                            logger.error(f"New session creation failed for {model_type.value}. Will wait for an existing session.")
                            pass
                    else:
-                        logger.warning(f"Cannot create new session for {model_type.value}. VRAM threshold not met. (Free: {free_vram_ratio:.2f} <= Threshold: {settings.SESSION_VRAM_THRESHOLD:.2f})")
+                        unit = gpu_mem_info.get("unit", "")
                        used = gpu_mem_info.get("used", 0)
                        total = gpu_mem_info.get("total", 0)
                        usage_percent = gpu_mem_info.get("usage_percent", 0)
                        logger.warning(
                            f"Cannot create new session for {model_type.value}. VRAM threshold not met. "
                            f"(Free ratio: {free_vram_ratio:.2f} <= Threshold: {settings.SESSION_VRAM_THRESHOLD:.2f}, "
                            f"VRAM: {used:.2f}/{total:.2f} {unit} ({usage_percent:.1f}%))"
                        )
                logger.debug(f"No available sessions or VRAM for {model_type.value}, waiting...")
                await condition.wait()
@ -211,7 +230,9 @@ class SessionPool:
        condition = self.conditions[session.model_type]
        async with condition:
            session.in_use = False
-            logger.debug(f"Released session {session.session_id}")
+            total = len(self.pools[session.model_type])
            in_use = sum(1 for s in self.pools[session.model_type] if s.in_use)
            logger.info(f"[{session.model_type.value}] release {session.session_id} (in_use={in_use}/{total})")
            condition.notify()
    def get_status(self) -> dict:
--- a/app/core/worker_manager.py
+++ b/app/core/worker_manager.py
@ -364,7 +364,7 @@ class WorkerManager:
        # _execute_task 대신 직접 실행
        return await _inpaint()
-    async def process_inpaint_batch(self, batch_data: List[Dict[str, Any]]) -> List[np.ndarray]:
+    async def process_inpaint_batch(self, batch_data: List[Dict[str, Any]], batch_id: str | None = None) -> List[np.ndarray]:
        """SimpleLama 배치 인페인팅 작업을 처리합니다."""
        if not batch_data:
            return []
@ -377,8 +377,16 @@ class WorkerManager:
        async with session_pool.get_session(model_type) as session:
-            vram_before = gpu_monitor.get_gpu_memory_info().get('used', 0)
+            vram_info = gpu_monitor.get_gpu_memory_info()
-            logger.info(f"🧠[{stats_model_key}] Batch Inference Start (Size: {batch_size}). VRAM: {(vram_before / 1024):.1f} GB")
+            used = vram_info.get('used', 0)
            total = vram_info.get('total', 0)
            unit = vram_info.get('unit', '')
            usage_percent = vram_info.get('usage_percent', 0)
            session_id = getattr(session, 'session_id', 'unknown')
            logger.info(
                f"🧠[simple_lama][{session_id}] Batch Start (id={batch_id or '-'}, size={batch_size}) "
                f"VRAM: {used:.1f}/{total:.1f} {unit or 'GiB'} ({usage_percent:.1f}%)"
            )
            start_time = time.time()
@ -393,8 +401,16 @@ class WorkerManager:
            duration = time.time() - start_time
-            vram_after = gpu_monitor.get_gpu_memory_info().get('used', 0)
+            vram_info_after = gpu_monitor.get_gpu_memory_info()
-            logger.info(f"✅[{stats_model_key}] Batch Inference End (Size: {batch_size}). VRAM: {(vram_after / 1024):.1f} GB | Duration: {duration:.3f}s")
+            used_after = vram_info_after.get('used', 0)
            total_after = vram_info_after.get('total', 0)
            unit_after = vram_info_after.get('unit', '')
            usage_percent_after = vram_info_after.get('usage_percent', 0)
            logger.info(
                f"✅[simple_lama][{session_id}] Batch End (id={batch_id or '-'}, size={batch_size}) "
                f"VRAM: {used_after:.1f}/{total_after:.1f} {unit_after or 'GiB'} ({usage_percent_after:.1f}%) | "
                f"Duration: {duration:.3f}s"
            )
            # 통계 기록: 배치 전체 처리 시간 / 배치 크기
            stats_manager.record_time(stats_model_key, duration / batch_size, count=batch_size)
--- a/app/models/migan.py
+++ b/app/models/migan.py
@ -10,8 +10,8 @@ import numpy as np
 import onnxruntime as ort
 from PIL import Image
-# OpenCV 내부 최적화 활성화 (리사이즈/컬러변환 가속)
+# OpenCV 내부 최적화 off 
-cv2.setUseOptimized(True)
+cv2.setUseOptimized(False)
 logger = logging.getLogger(__name__)
@ -69,10 +69,6 @@ class MiganInpainter:
                import onnxruntime as ort
                so = ort.SessionOptions()
                try:
                    so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
                except Exception:
                    pass
                if self.intra_threads > 0:
                    so.intra_op_num_threads = self.intra_threads
                if self.inter_threads > 0:
--- a/app/models/simple_lama.py
+++ b/app/models/simple_lama.py
@ -10,7 +10,6 @@ from typing import Union, Tuple, List
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
 from simple_lama_inpainting import SimpleLama
 from ..core.config import settings
 # 사용하지 않는 import 정리
 # from ..utils.image_utils import (
 #     decode_base64_to_image,
@ -52,42 +51,8 @@ class SimpleLamaInpainter:
                logger.info("fallback 모드로 전환합니다...")
                self._model = {"type": "simple_lama_fallback", "device": self._device, "fp16": self._fp16}
            # CUDA 최적화 적용
            try:
                if self._device.type == 'cuda':
                    # 입력 크기 고정 시 커널 탐색 최적화
                    torch.backends.cudnn.benchmark = True  # type: ignore[attr-defined]
                    # 내부 torch 모델에 channels_last 적용 (가중치는 FP32 유지)
                    model_module = getattr(self._model, "model", None)
                    if model_module is not None:
                        try:
                            model_module = model_module.to(self._device, memory_format=torch.channels_last)
                        except Exception:
                            model_module = model_module.to(self._device)
                        try:
                            model_module = model_module.eval()
                        except Exception:
                            pass
                        self._model.model = model_module
            except Exception as e:
                logger.warning(f"SimpleLama CUDA 최적화 적용 중 경고: {e}")
            self.loaded = True
            logger.info("Simple LAMA model loaded successfully")
            # 1회 워밍업: 최초 요청에서의 지연 방지
            try:
                if self._device.type == 'cuda' and hasattr(self._model, 'model'):
                    with torch.no_grad():
                        img = torch.zeros(1, 3, 512, 512, device=self._device, dtype=torch.float32)
                        msk = torch.ones(1, 1, 512, 512, device=self._device, dtype=torch.float32)
                        _ = self._model.model(img, msk)
                        # 두 번째 가벼운 호출로 알고리즘/캐시 고정
                        _ = self._model.model(img, msk)
                    logger.info("SimpleLama 워밍업 완료")
            except Exception as e:
                logger.warning(f"SimpleLama 워밍업 스킵: {e}")
        except Exception as e:
            logger.error(f"Failed to load Simple LAMA model: {e}")
@ -162,9 +127,9 @@ class SimpleLamaInpainter:
        if not self.is_ready:
            raise RuntimeError("SimpleLama model is not loaded yet.")
-        # 모델이 GPU에 있는지 확인 (불필요한 empty_cache 제거 → 성능 향상)
+        # 모델이 GPU에 있는지 확인
-        # if self._device.type != 'cpu':
+        if self._device.type != 'cpu':
-        #     torch.cuda.empty_cache()
+            torch.cuda.empty_cache()
        # 전처리
        pil_images = [Image.fromarray(img) for img in images]
@ -177,34 +142,24 @@ class SimpleLamaInpainter:
            preprocessed_images.append(img_tensor)
            preprocessed_masks.append(mask_tensor)
-        image_batch = torch.stack(preprocessed_images)
+        # 고정 크기 입력이므로 pinned memory + non_blocking 복사 최적화
-        mask_batch = torch.stack(preprocessed_masks)
+        image_batch = torch.stack(preprocessed_images).pin_memory() if self._device.type == 'cuda' else torch.stack(preprocessed_images)
-
+        mask_batch = torch.stack(preprocessed_masks).pin_memory() if self._device.type == 'cuda' else torch.stack(preprocessed_masks)
-        # H2D 복사 최적화: pinned memory + non_blocking
+        image_batch = image_batch.to(self._device, non_blocking=True)
-        if self._device.type != 'cpu':
+        mask_batch = mask_batch.to(self._device, non_blocking=True)
            try:
                image_batch = image_batch.pin_memory().to(self._device, non_blocking=True)
                mask_batch = mask_batch.pin_memory().to(self._device, non_blocking=True)
            except Exception:
                image_batch = image_batch.to(self._device)
                mask_batch = mask_batch.to(self._device)
        # 원본 이미지와 사이즈 저장
        original_images_and_sizes = list(zip(pil_images, [img.size for img in pil_images]))
        # 모델 호출
        logger.info(f"실제 SimpleLama 모델로 {len(images)}개 이미지 인페인팅 수행")
        # 성능 최적화: AMP + cuDNN benchmark
        torch.backends.cudnn.benchmark = True
        with torch.no_grad():
-            use_autocast = (self._device.type == 'cuda') and (self._fp16 or getattr(settings, "USE_FP16", False))
+            if self._device.type == 'cuda':
-            if use_autocast:
+                with torch.cuda.amp.autocast(enabled=True):
                # 추론 시에만 FP16 autocast 사용 (가중치는 FP32 유지)
                try:
                    with torch.amp.autocast('cuda', dtype=torch.float16):  # type: ignore[attr-defined]
                        inpainted_batch = self._model.model(image_batch, mask_batch)
                except Exception:
                    inpainted_batch = self._model.model(image_batch, mask_batch)
            else:
                # 라이브러리의 __call__ 대신 내부 torch 모델을 직접 호출
                inpainted_batch = self._model.model(image_batch, mask_batch)
        # 후처리
@ -235,10 +190,8 @@ class SimpleLamaInpainter:
    def _postprocess(self, tensor: torch.Tensor, original_size: Tuple[int, int], original_image: Image.Image, original_mask: Image.Image) -> Image.Image:
        """모델 출력 텐서를 PIL 이미지로 후처리하고 원본에 합성합니다."""
        # 텐서를 PIL 이미지로 변환
-        result_np = tensor.permute(1, 2, 0).detach().float().cpu().numpy()
+        result_np = tensor.permute(1, 2, 0).cpu().numpy()
-        # NaN/Inf 안전 처리 후 범위 클램프
+        result_np = np.clip(result_np * 255, 0, 255).astype(np.uint8)
        result_np = np.nan_to_num(result_np, nan=0.0, posinf=1.0, neginf=0.0)
        result_np = (np.clip(result_np, 0.0, 1.0) * 255.0).astype(np.uint8)
        inpainted_image_512 = Image.fromarray(result_np)
        # 원본 크기로 리사이즈
--- a/app/utils/gpu_monitor.py
+++ b/app/utils/gpu_monitor.py
@ -476,31 +476,58 @@ class GPUMonitor:
            self.initialized = True
    def get_gpu_memory_info(self, device_id: int = 0) -> Dict[str, float]:
-        """GPU 메모리 정보를 반환합니다."""
+        """GPU 메모리 정보를 반환합니다.
        반환 형식 통일:
          - keys: total, used, free, usage_percent, free_ratio, unit
          - unit: "GiB" 또는 "MiB"
        """
        if self.is_jetson:
-            return self.jetson_monitor.get_gpu_memory_info()
+            info = self.jetson_monitor.get_gpu_memory_info()
            # Jetson 쪽 반환 값에 누락된 키 보정 및 단위 명시
            if info:
                total = info.get("total", 0)
                used = info.get("used", 0)
                free = info.get("free", 0)
                # Jetson 경로는 MiB 기반으로 동작하도록 통일
                unit = "MiB"
                # 일부 fallback 경로는 GB를 반환할 수 있어 값이 작으면 GB로 간주 → MiB로 변환
                if total and total < 100:  # 100 GiB 미만이면 GB일 가능성
                    total, used, free = total * 1024, used * 1024, free * 1024
                usage_percent = info.get("usage_percent", (used / total * 100) if total else 0)
                free_ratio = info.get("free_ratio", (free / total) if total else 0)
                return {
                    "total": total,
                    "used": used,
                    "free": free,
                    "usage_percent": round(float(usage_percent), 2),
                    "free_ratio": round(float(free_ratio), 4),
                    "unit": unit,
                }
            return {"total": 0, "used": 0, "free": 0, "usage_percent": 0, "free_ratio": 0, "unit": "MiB"}
        if not self.initialized or not NVML_AVAILABLE:
-            return {"total": 0, "used": 0, "free": 0, "usage_percent": 0}
+            return {"total": 0, "used": 0, "free": 0, "usage_percent": 0, "free_ratio": 0, "unit": "GiB"}
        try:
            handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
            mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
-            total = mem_info.total / 1024**3  # GB
+            total_gib = mem_info.total / 1024**3
-            used = mem_info.used / 1024**3   # GB
+            used_gib = mem_info.used / 1024**3
-            free = mem_info.free / 1024**3   # GB
+            free_gib = mem_info.free / 1024**3
-            usage_percent = (used / total) * 100
+            usage_percent = (used_gib / total_gib) * 100 if total_gib else 0
-            
+
            return {
-                "total": round(total, 2),
+                "total": round(total_gib, 2),
-                "used": round(used, 2),
+                "used": round(used_gib, 2),
-                "free": round(free, 2),
+                "free": round(free_gib, 2),
-                "usage_percent": round(usage_percent, 2)
+                "usage_percent": round(usage_percent, 2),
                "free_ratio": round((free_gib / total_gib), 4) if total_gib else 0,
                "unit": "GiB",
            }
        except Exception as e:
            logger.error(f"Error getting GPU memory info: {e}")
-            return {"total": 0, "used": 0, "free": 0, "usage_percent": 0}
+            return {"total": 0, "used": 0, "free": 0, "usage_percent": 0, "free_ratio": 0, "unit": "GiB"}
    def get_gpu_utilization(self, device_id: int = 0) -> float:
        """GPU 사용률을 반환합니다."""