인페인팅 및 배경 제거 API에 요청 ID 및 클라이언트 IP 로깅 기능을 추가하였으며, 이미지 및 마스크의 메타 정보를 로그에 기록하도록 개선하였습니다. 배치 처리 시 배치 ID를 부여하고, VRAM 사용량 및 세션 상태를 보다 상세히 로깅하도록 수정하였습니다. GPU 메모리 정보 반환 형식을 통일하고, OpenCV 최적화를 비활성화하였습니다. 설정 파일에서 최대 세션 수 및 VRAM 임계값을 조정하였습니다.

This commit is contained in:
vast 2025-10-04 12:30:59 +00:00
parent 1d09de91e2
commit 9619fbc1db
8 changed files with 221 additions and 99 deletions

View File

@ -23,6 +23,7 @@ from ..utils.image_utils import (
pil_to_bytes, numpy_to_bytes, adjust_mask, gen_frontend_mask pil_to_bytes, numpy_to_bytes, adjust_mask, gen_frontend_mask
) )
import base64 import base64
import uuid
import io import io
from ..monitoring.dashboard import monitoring_data from ..monitoring.dashboard import monitoring_data
from .stats import router as stats_router from .stats import router as stats_router
@ -237,16 +238,83 @@ async def get_server_config():
async def inpaint_image( async def inpaint_image(
request: InpaintRequest, request: InpaintRequest,
response_format: ResponseFormat = Query(ResponseFormat.binary, description="응답 형식 (기존 클라이언트 호환을 위해 기본값: binary)"), response_format: ResponseFormat = Query(ResponseFormat.binary, description="응답 형식 (기존 클라이언트 호환을 위해 기본값: binary)"),
image_format: ImageFormat = Query(ImageFormat.png, description="이미지 형식") image_format: ImageFormat = Query(ImageFormat.png, description="이미지 형식"),
http_request: Request = None,
): ):
"""인페인팅 API (iopaint 호환)""" """인페인팅 API (iopaint 호환)"""
start_time = time.time() start_time = time.time()
alpha_channel = None # 변수 초기화 alpha_channel = None # 변수 초기화
try: try:
req_id = f"req_{uuid.uuid4().hex[:8]}"
client_ip = None
try:
if http_request and http_request.client:
client_ip = http_request.client.host
except Exception:
client_ip = None
# 원본 base64 크기(바이트) 계산
try:
raw_img_b64 = request.image.split(',', 1)[1] if isinstance(request.image, str) and ',' in request.image else request.image
img_bytes_len = len(base64.b64decode(raw_img_b64)) if raw_img_b64 else 0
except Exception:
img_bytes_len = 0
try:
raw_mask_b64 = request.mask.split(',', 1)[1] if isinstance(request.mask, str) and ',' in request.mask else request.mask
mask_bytes_len = len(base64.b64decode(raw_mask_b64)) if raw_mask_b64 else 0
except Exception:
mask_bytes_len = 0
# base64 이미지 디코딩 # base64 이미지 디코딩
image, alpha_channel, info, ext = decode_base64_to_image(request.image) image, alpha_channel, info, ext = decode_base64_to_image(request.image)
mask, _, _, _ = decode_base64_to_image(request.mask, gray=True) mask, _, mask_info, mask_ext = decode_base64_to_image(request.mask, gray=True)
# 이미지/마스크 메타 로깅
try:
img_h, img_w = image.shape[:2]
img_ch = image.shape[2] if len(image.shape) == 3 else 1
meta_image = {
"request_id": req_id,
"client_ip": client_ip,
"kind": "image",
"format": info.get("format"),
"mode": info.get("mode"),
"ext": ext,
"pil_size": info.get("size"),
"np_shape": tuple(image.shape),
"h": img_h,
"w": img_w,
"channels": img_ch,
"dtype": str(image.dtype),
"bytes": img_bytes_len,
"has_alpha": alpha_channel is not None,
}
logger.info(f"[INPAINT_META] {meta_image}")
except Exception:
pass
try:
mask_h, mask_w = mask.shape[:2]
mask_ch = mask.shape[2] if len(mask.shape) == 3 else 1
meta_mask = {
"request_id": req_id,
"client_ip": client_ip,
"kind": "mask",
"format": mask_info.get("format") if isinstance(mask_info, dict) else None,
"mode": mask_info.get("mode") if isinstance(mask_info, dict) else None,
"ext": mask_ext if 'mask_ext' in locals() else None,
"pil_size": mask_info.get("size") if isinstance(mask_info, dict) else None,
"np_shape": tuple(mask.shape),
"h": mask_h,
"w": mask_w,
"channels": mask_ch,
"dtype": str(mask.dtype),
"bytes": mask_bytes_len,
}
logger.info(f"[INPAINT_META] {meta_mask}")
except Exception:
pass
# alpha_channel이 None인 경우 기본값 설정 # alpha_channel이 None인 경우 기본값 설정
if alpha_channel is None: if alpha_channel is None:
@ -354,15 +422,54 @@ async def inpaint_image(
async def remove_background( async def remove_background(
request: RemoveBGRequest, request: RemoveBGRequest,
response_format: ResponseFormat = Query(ResponseFormat.base64, description="응답 형식"), response_format: ResponseFormat = Query(ResponseFormat.base64, description="응답 형식"),
image_format: ImageFormat = Query(ImageFormat.png, description="이미지 형식") image_format: ImageFormat = Query(ImageFormat.png, description="이미지 형식"),
http_request: Request = None,
): ):
"""배경 제거 API (iopaint 호환)""" """배경 제거 API (iopaint 호환)"""
start_time = time.time() start_time = time.time()
alpha_channel = None # 변수 초기화 alpha_channel = None # 변수 초기화
try: try:
req_id = f"req_{uuid.uuid4().hex[:8]}"
client_ip = None
try:
if http_request and http_request.client:
client_ip = http_request.client.host
except Exception:
client_ip = None
# 원본 base64 크기(바이트) 계산
try:
raw_img_b64 = request.image.split(',', 1)[1] if isinstance(request.image, str) and ',' in request.image else request.image
img_bytes_len = len(base64.b64decode(raw_img_b64)) if raw_img_b64 else 0
except Exception:
img_bytes_len = 0
# base64 이미지 디코딩 # base64 이미지 디코딩
image, alpha_channel, info, ext = decode_base64_to_image(request.image) image, alpha_channel, info, ext = decode_base64_to_image(request.image)
# 이미지 메타 로깅
try:
img_h, img_w = image.shape[:2]
img_ch = image.shape[2] if len(image.shape) == 3 else 1
meta_image = {
"request_id": req_id,
"client_ip": client_ip,
"kind": "image",
"format": info.get("format"),
"mode": info.get("mode"),
"ext": ext,
"pil_size": info.get("size"),
"np_shape": tuple(image.shape),
"h": img_h,
"w": img_w,
"channels": img_ch,
"dtype": str(image.dtype),
"bytes": img_bytes_len,
"has_alpha": alpha_channel is not None,
}
logger.info(f"[REMOVEBG_META] {meta_image}")
except Exception:
pass
# alpha_channel이 None인 경우 기본값 설정 # alpha_channel이 None인 경우 기본값 설정
if alpha_channel is None: if alpha_channel is None:

View File

@ -105,10 +105,12 @@ class BatchManager:
break # 대기 시간 초과 break # 대기 시간 초과
logger.info(f"Creating a new batch with {len(batch)} jobs.") logger.info(f"Creating a new batch with {len(batch)} jobs.")
# 배치 ID 부여
batch_id = f"batch_{uuid.uuid4().hex[:8]}"
# 배치를 처리할 별도의 태스크를 생성하여 루프가 다른 배치를 만드는 것을 막지 않도록 합니다. # 배치를 처리할 별도의 태스크를 생성하여 루프가 다른 배치를 만드는 것을 막지 않도록 합니다.
asyncio.create_task(self._process_batch(batch)) asyncio.create_task(self._process_batch(batch, batch_id))
async def _process_batch(self, batch: List[BatchJob]): async def _process_batch(self, batch: List[BatchJob], batch_id: str):
""" """
생성된 배치를 WorkerManager에 전달하여 처리하고 결과를 전파합니다. 생성된 배치를 WorkerManager에 전달하여 처리하고 결과를 전파합니다.
""" """
@ -117,7 +119,7 @@ class BatchManager:
try: try:
# WorkerManager에 배치 처리를 요청합니다. # WorkerManager에 배치 처리를 요청합니다.
# worker_manager의 process_inpaint는 이제 배치 데이터를 처리할 수 있어야 합니다. # worker_manager의 process_inpaint는 이제 배치 데이터를 처리할 수 있어야 합니다.
results = await worker_manager.process_inpaint_batch(batch_data) results = await worker_manager.process_inpaint_batch(batch_data, batch_id=batch_id)
if len(results) != len(batch): if len(results) != len(batch):
raise ValueError(f"Result count ({len(results)}) does not match batch size ({len(batch)}).") raise ValueError(f"Result count ({len(results)}) does not match batch size ({len(batch)}).")
@ -128,7 +130,7 @@ class BatchManager:
job.future.set_exception(result) job.future.set_exception(result)
else: else:
job.future.set_result(result) job.future.set_result(result)
logger.info(f"Successfully processed batch of {len(batch)} jobs.") logger.info(f"✅ Batch Completed (id={batch_id}, size={len(batch)})")
except Exception as e: except Exception as e:
logger.error(f"Failed to process batch: {e}", exc_info=True) logger.error(f"Failed to process batch: {e}", exc_info=True)

View File

@ -90,7 +90,7 @@ class Settings(BaseSettings):
# 동적 세션 풀/메모리 # 동적 세션 풀/메모리
# ========================= # =========================
SIMPLE_LAMA_MIN_SESSIONS: int = 4 SIMPLE_LAMA_MIN_SESSIONS: int = 4
SIMPLE_LAMA_MAX_SESSIONS: int = 6 SIMPLE_LAMA_MAX_SESSIONS: int = 8
# x86에서는 MIGAN 미로딩(지연 로딩) 기본 → MIN=0 # x86에서는 MIGAN 미로딩(지연 로딩) 기본 → MIN=0
MIGAN_MIN_SESSIONS: int = 2 if IS_JETSON else 1 MIGAN_MIN_SESSIONS: int = 2 if IS_JETSON else 1
@ -100,12 +100,12 @@ class Settings(BaseSettings):
REMBG_MAX_SESSIONS: int = 6 REMBG_MAX_SESSIONS: int = 6
# 여유 VRAM 비율(남은 VRAM이 이 값보다 커야 세션 추가) # 여유 VRAM 비율(남은 VRAM이 이 값보다 커야 세션 추가)
SESSION_VRAM_THRESHOLD: float = 0.30 SESSION_VRAM_THRESHOLD: float = 0.12
SESSION_IDLE_TIMEOUT: int = 1800 # 초 (0이면 비활성) SESSION_IDLE_TIMEOUT: int = 1800 # 초 (0이면 비활성)
# 마이크로 배치(SimpleLAMA) # 마이크로 배치(SimpleLAMA)
USE_MICRO_BATCHING: bool = True USE_MICRO_BATCHING: bool = True
MICRO_BATCH_SIZE: int = 4 MICRO_BATCH_SIZE: int = 8
MICRO_BATCH_TIMEOUT_MS: int = 100 MICRO_BATCH_TIMEOUT_MS: int = 100
# 사전 확정 세션(플랫폼 감안 기본치) # 사전 확정 세션(플랫폼 감안 기본치)

View File

@ -91,7 +91,16 @@ class SessionPool:
if not gpu_info or 'used' not in gpu_info: if not gpu_info or 'used' not in gpu_info:
vram_usage = "VRAM: N/A" vram_usage = "VRAM: N/A"
else: else:
vram_usage = f"VRAM: {(gpu_info['used'] / 1024):.1f}/{(gpu_info['total'] / 1024):.1f} GB ({gpu_info['usage_percent']:.1f}%)" unit = gpu_info.get('unit', '')
used = gpu_info.get('used', 0)
total = gpu_info.get('total', 0)
usage_percent = gpu_info.get('usage_percent', 0)
if unit == 'MiB':
used_gb = used / 1024.0
total_gb = total / 1024.0
vram_usage = f"VRAM: {used_gb:.1f}/{total_gb:.1f} GB ({usage_percent:.1f}%)"
else:
vram_usage = f"VRAM: {used:.1f}/{total:.1f} {unit or 'GiB'} ({usage_percent:.1f}%)"
session_counts = ", ".join([f"{mt.value}: {len(p)}" for mt, p in self.pools.items()]) session_counts = ", ".join([f"{mt.value}: {len(p)}" for mt, p in self.pools.items()])
@ -175,7 +184,9 @@ class SessionPool:
if not session.in_use: if not session.in_use:
session.in_use = True session.in_use = True
session.mark_used() session.mark_used()
logger.debug(f"Acquired existing session {session.session_id}") total = len(self.pools[model_type])
in_use = sum(1 for s in self.pools[model_type] if s.in_use)
logger.info(f"[{model_type.value}] acquire {session.session_id} (in_use={in_use}/{total})")
return session return session
if len(self.pools[model_type]) < max_sessions: if len(self.pools[model_type]) < max_sessions:
@ -202,7 +213,15 @@ class SessionPool:
logger.error(f"New session creation failed for {model_type.value}. Will wait for an existing session.") logger.error(f"New session creation failed for {model_type.value}. Will wait for an existing session.")
pass pass
else: else:
logger.warning(f"Cannot create new session for {model_type.value}. VRAM threshold not met. (Free: {free_vram_ratio:.2f} <= Threshold: {settings.SESSION_VRAM_THRESHOLD:.2f})") unit = gpu_mem_info.get("unit", "")
used = gpu_mem_info.get("used", 0)
total = gpu_mem_info.get("total", 0)
usage_percent = gpu_mem_info.get("usage_percent", 0)
logger.warning(
f"Cannot create new session for {model_type.value}. VRAM threshold not met. "
f"(Free ratio: {free_vram_ratio:.2f} <= Threshold: {settings.SESSION_VRAM_THRESHOLD:.2f}, "
f"VRAM: {used:.2f}/{total:.2f} {unit} ({usage_percent:.1f}%))"
)
logger.debug(f"No available sessions or VRAM for {model_type.value}, waiting...") logger.debug(f"No available sessions or VRAM for {model_type.value}, waiting...")
await condition.wait() await condition.wait()
@ -211,7 +230,9 @@ class SessionPool:
condition = self.conditions[session.model_type] condition = self.conditions[session.model_type]
async with condition: async with condition:
session.in_use = False session.in_use = False
logger.debug(f"Released session {session.session_id}") total = len(self.pools[session.model_type])
in_use = sum(1 for s in self.pools[session.model_type] if s.in_use)
logger.info(f"[{session.model_type.value}] release {session.session_id} (in_use={in_use}/{total})")
condition.notify() condition.notify()
def get_status(self) -> dict: def get_status(self) -> dict:

View File

@ -364,7 +364,7 @@ class WorkerManager:
# _execute_task 대신 직접 실행 # _execute_task 대신 직접 실행
return await _inpaint() return await _inpaint()
async def process_inpaint_batch(self, batch_data: List[Dict[str, Any]]) -> List[np.ndarray]: async def process_inpaint_batch(self, batch_data: List[Dict[str, Any]], batch_id: str | None = None) -> List[np.ndarray]:
"""SimpleLama 배치 인페인팅 작업을 처리합니다.""" """SimpleLama 배치 인페인팅 작업을 처리합니다."""
if not batch_data: if not batch_data:
return [] return []
@ -377,8 +377,16 @@ class WorkerManager:
async with session_pool.get_session(model_type) as session: async with session_pool.get_session(model_type) as session:
vram_before = gpu_monitor.get_gpu_memory_info().get('used', 0) vram_info = gpu_monitor.get_gpu_memory_info()
logger.info(f"🧠[{stats_model_key}] Batch Inference Start (Size: {batch_size}). VRAM: {(vram_before / 1024):.1f} GB") used = vram_info.get('used', 0)
total = vram_info.get('total', 0)
unit = vram_info.get('unit', '')
usage_percent = vram_info.get('usage_percent', 0)
session_id = getattr(session, 'session_id', 'unknown')
logger.info(
f"🧠[simple_lama][{session_id}] Batch Start (id={batch_id or '-'}, size={batch_size}) "
f"VRAM: {used:.1f}/{total:.1f} {unit or 'GiB'} ({usage_percent:.1f}%)"
)
start_time = time.time() start_time = time.time()
@ -393,8 +401,16 @@ class WorkerManager:
duration = time.time() - start_time duration = time.time() - start_time
vram_after = gpu_monitor.get_gpu_memory_info().get('used', 0) vram_info_after = gpu_monitor.get_gpu_memory_info()
logger.info(f"✅[{stats_model_key}] Batch Inference End (Size: {batch_size}). VRAM: {(vram_after / 1024):.1f} GB | Duration: {duration:.3f}s") used_after = vram_info_after.get('used', 0)
total_after = vram_info_after.get('total', 0)
unit_after = vram_info_after.get('unit', '')
usage_percent_after = vram_info_after.get('usage_percent', 0)
logger.info(
f"✅[simple_lama][{session_id}] Batch End (id={batch_id or '-'}, size={batch_size}) "
f"VRAM: {used_after:.1f}/{total_after:.1f} {unit_after or 'GiB'} ({usage_percent_after:.1f}%) | "
f"Duration: {duration:.3f}s"
)
# 통계 기록: 배치 전체 처리 시간 / 배치 크기 # 통계 기록: 배치 전체 처리 시간 / 배치 크기
stats_manager.record_time(stats_model_key, duration / batch_size, count=batch_size) stats_manager.record_time(stats_model_key, duration / batch_size, count=batch_size)

View File

@ -10,8 +10,8 @@ import numpy as np
import onnxruntime as ort import onnxruntime as ort
from PIL import Image from PIL import Image
# OpenCV 내부 최적화 활성화 (리사이즈/컬러변환 가속) # OpenCV 내부 최적화 off
cv2.setUseOptimized(True) cv2.setUseOptimized(False)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -69,10 +69,6 @@ class MiganInpainter:
import onnxruntime as ort import onnxruntime as ort
so = ort.SessionOptions() so = ort.SessionOptions()
try:
so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
except Exception:
pass
if self.intra_threads > 0: if self.intra_threads > 0:
so.intra_op_num_threads = self.intra_threads so.intra_op_num_threads = self.intra_threads
if self.inter_threads > 0: if self.inter_threads > 0:

View File

@ -10,7 +10,6 @@ from typing import Union, Tuple, List
import asyncio import asyncio
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from simple_lama_inpainting import SimpleLama from simple_lama_inpainting import SimpleLama
from ..core.config import settings
# 사용하지 않는 import 정리 # 사용하지 않는 import 정리
# from ..utils.image_utils import ( # from ..utils.image_utils import (
# decode_base64_to_image, # decode_base64_to_image,
@ -52,42 +51,8 @@ class SimpleLamaInpainter:
logger.info("fallback 모드로 전환합니다...") logger.info("fallback 모드로 전환합니다...")
self._model = {"type": "simple_lama_fallback", "device": self._device, "fp16": self._fp16} self._model = {"type": "simple_lama_fallback", "device": self._device, "fp16": self._fp16}
# CUDA 최적화 적용
try:
if self._device.type == 'cuda':
# 입력 크기 고정 시 커널 탐색 최적화
torch.backends.cudnn.benchmark = True # type: ignore[attr-defined]
# 내부 torch 모델에 channels_last 적용 (가중치는 FP32 유지)
model_module = getattr(self._model, "model", None)
if model_module is not None:
try:
model_module = model_module.to(self._device, memory_format=torch.channels_last)
except Exception:
model_module = model_module.to(self._device)
try:
model_module = model_module.eval()
except Exception:
pass
self._model.model = model_module
except Exception as e:
logger.warning(f"SimpleLama CUDA 최적화 적용 중 경고: {e}")
self.loaded = True self.loaded = True
logger.info("Simple LAMA model loaded successfully") logger.info("Simple LAMA model loaded successfully")
# 1회 워밍업: 최초 요청에서의 지연 방지
try:
if self._device.type == 'cuda' and hasattr(self._model, 'model'):
with torch.no_grad():
img = torch.zeros(1, 3, 512, 512, device=self._device, dtype=torch.float32)
msk = torch.ones(1, 1, 512, 512, device=self._device, dtype=torch.float32)
_ = self._model.model(img, msk)
# 두 번째 가벼운 호출로 알고리즘/캐시 고정
_ = self._model.model(img, msk)
logger.info("SimpleLama 워밍업 완료")
except Exception as e:
logger.warning(f"SimpleLama 워밍업 스킵: {e}")
except Exception as e: except Exception as e:
logger.error(f"Failed to load Simple LAMA model: {e}") logger.error(f"Failed to load Simple LAMA model: {e}")
@ -162,9 +127,9 @@ class SimpleLamaInpainter:
if not self.is_ready: if not self.is_ready:
raise RuntimeError("SimpleLama model is not loaded yet.") raise RuntimeError("SimpleLama model is not loaded yet.")
# 모델이 GPU에 있는지 확인 (불필요한 empty_cache 제거 → 성능 향상) # 모델이 GPU에 있는지 확인
# if self._device.type != 'cpu': if self._device.type != 'cpu':
# torch.cuda.empty_cache() torch.cuda.empty_cache()
# 전처리 # 전처리
pil_images = [Image.fromarray(img) for img in images] pil_images = [Image.fromarray(img) for img in images]
@ -177,34 +142,24 @@ class SimpleLamaInpainter:
preprocessed_images.append(img_tensor) preprocessed_images.append(img_tensor)
preprocessed_masks.append(mask_tensor) preprocessed_masks.append(mask_tensor)
image_batch = torch.stack(preprocessed_images) # 고정 크기 입력이므로 pinned memory + non_blocking 복사 최적화
mask_batch = torch.stack(preprocessed_masks) image_batch = torch.stack(preprocessed_images).pin_memory() if self._device.type == 'cuda' else torch.stack(preprocessed_images)
mask_batch = torch.stack(preprocessed_masks).pin_memory() if self._device.type == 'cuda' else torch.stack(preprocessed_masks)
# H2D 복사 최적화: pinned memory + non_blocking image_batch = image_batch.to(self._device, non_blocking=True)
if self._device.type != 'cpu': mask_batch = mask_batch.to(self._device, non_blocking=True)
try:
image_batch = image_batch.pin_memory().to(self._device, non_blocking=True)
mask_batch = mask_batch.pin_memory().to(self._device, non_blocking=True)
except Exception:
image_batch = image_batch.to(self._device)
mask_batch = mask_batch.to(self._device)
# 원본 이미지와 사이즈 저장 # 원본 이미지와 사이즈 저장
original_images_and_sizes = list(zip(pil_images, [img.size for img in pil_images])) original_images_and_sizes = list(zip(pil_images, [img.size for img in pil_images]))
# 모델 호출 # 모델 호출
logger.info(f"실제 SimpleLama 모델로 {len(images)}개 이미지 인페인팅 수행") logger.info(f"실제 SimpleLama 모델로 {len(images)}개 이미지 인페인팅 수행")
# 성능 최적화: AMP + cuDNN benchmark
torch.backends.cudnn.benchmark = True
with torch.no_grad(): with torch.no_grad():
use_autocast = (self._device.type == 'cuda') and (self._fp16 or getattr(settings, "USE_FP16", False)) if self._device.type == 'cuda':
if use_autocast: with torch.cuda.amp.autocast(enabled=True):
# 추론 시에만 FP16 autocast 사용 (가중치는 FP32 유지)
try:
with torch.amp.autocast('cuda', dtype=torch.float16): # type: ignore[attr-defined]
inpainted_batch = self._model.model(image_batch, mask_batch)
except Exception:
inpainted_batch = self._model.model(image_batch, mask_batch) inpainted_batch = self._model.model(image_batch, mask_batch)
else: else:
# 라이브러리의 __call__ 대신 내부 torch 모델을 직접 호출
inpainted_batch = self._model.model(image_batch, mask_batch) inpainted_batch = self._model.model(image_batch, mask_batch)
# 후처리 # 후처리
@ -235,10 +190,8 @@ class SimpleLamaInpainter:
def _postprocess(self, tensor: torch.Tensor, original_size: Tuple[int, int], original_image: Image.Image, original_mask: Image.Image) -> Image.Image: def _postprocess(self, tensor: torch.Tensor, original_size: Tuple[int, int], original_image: Image.Image, original_mask: Image.Image) -> Image.Image:
"""모델 출력 텐서를 PIL 이미지로 후처리하고 원본에 합성합니다.""" """모델 출력 텐서를 PIL 이미지로 후처리하고 원본에 합성합니다."""
# 텐서를 PIL 이미지로 변환 # 텐서를 PIL 이미지로 변환
result_np = tensor.permute(1, 2, 0).detach().float().cpu().numpy() result_np = tensor.permute(1, 2, 0).cpu().numpy()
# NaN/Inf 안전 처리 후 범위 클램프 result_np = np.clip(result_np * 255, 0, 255).astype(np.uint8)
result_np = np.nan_to_num(result_np, nan=0.0, posinf=1.0, neginf=0.0)
result_np = (np.clip(result_np, 0.0, 1.0) * 255.0).astype(np.uint8)
inpainted_image_512 = Image.fromarray(result_np) inpainted_image_512 = Image.fromarray(result_np)
# 원본 크기로 리사이즈 # 원본 크기로 리사이즈

View File

@ -476,31 +476,58 @@ class GPUMonitor:
self.initialized = True self.initialized = True
def get_gpu_memory_info(self, device_id: int = 0) -> Dict[str, float]: def get_gpu_memory_info(self, device_id: int = 0) -> Dict[str, float]:
"""GPU 메모리 정보를 반환합니다.""" """GPU 메모리 정보를 반환합니다.
반환 형식 통일:
- keys: total, used, free, usage_percent, free_ratio, unit
- unit: "GiB" 또는 "MiB"
"""
if self.is_jetson: if self.is_jetson:
return self.jetson_monitor.get_gpu_memory_info() info = self.jetson_monitor.get_gpu_memory_info()
# Jetson 쪽 반환 값에 누락된 키 보정 및 단위 명시
if info:
total = info.get("total", 0)
used = info.get("used", 0)
free = info.get("free", 0)
# Jetson 경로는 MiB 기반으로 동작하도록 통일
unit = "MiB"
# 일부 fallback 경로는 GB를 반환할 수 있어 값이 작으면 GB로 간주 → MiB로 변환
if total and total < 100: # 100 GiB 미만이면 GB일 가능성
total, used, free = total * 1024, used * 1024, free * 1024
usage_percent = info.get("usage_percent", (used / total * 100) if total else 0)
free_ratio = info.get("free_ratio", (free / total) if total else 0)
return {
"total": total,
"used": used,
"free": free,
"usage_percent": round(float(usage_percent), 2),
"free_ratio": round(float(free_ratio), 4),
"unit": unit,
}
return {"total": 0, "used": 0, "free": 0, "usage_percent": 0, "free_ratio": 0, "unit": "MiB"}
if not self.initialized or not NVML_AVAILABLE: if not self.initialized or not NVML_AVAILABLE:
return {"total": 0, "used": 0, "free": 0, "usage_percent": 0} return {"total": 0, "used": 0, "free": 0, "usage_percent": 0, "free_ratio": 0, "unit": "GiB"}
try: try:
handle = pynvml.nvmlDeviceGetHandleByIndex(device_id) handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle) mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
total = mem_info.total / 1024**3 # GB total_gib = mem_info.total / 1024**3
used = mem_info.used / 1024**3 # GB used_gib = mem_info.used / 1024**3
free = mem_info.free / 1024**3 # GB free_gib = mem_info.free / 1024**3
usage_percent = (used / total) * 100 usage_percent = (used_gib / total_gib) * 100 if total_gib else 0
return { return {
"total": round(total, 2), "total": round(total_gib, 2),
"used": round(used, 2), "used": round(used_gib, 2),
"free": round(free, 2), "free": round(free_gib, 2),
"usage_percent": round(usage_percent, 2) "usage_percent": round(usage_percent, 2),
"free_ratio": round((free_gib / total_gib), 4) if total_gib else 0,
"unit": "GiB",
} }
except Exception as e: except Exception as e:
logger.error(f"Error getting GPU memory info: {e}") logger.error(f"Error getting GPU memory info: {e}")
return {"total": 0, "used": 0, "free": 0, "usage_percent": 0} return {"total": 0, "used": 0, "free": 0, "usage_percent": 0, "free_ratio": 0, "unit": "GiB"}
def get_gpu_utilization(self, device_id: int = 0) -> float: def get_gpu_utilization(self, device_id: int = 0) -> float:
"""GPU 사용률을 반환합니다.""" """GPU 사용률을 반환합니다."""