환경 설정 파일에서 세션 및 워커 수를 조정하고, API 에러 로그 기록 방식을 개선하였습니다. 대시보드에서 스케일 및 세션 이벤트를 시각화할 수 있는 기능을 추가하였으며, 관련 HTML 구조를 업데이트하였습니다. 서버 로그에서 프로세스 ID 변경 사항을 반영하였습니다.

2025-10-02 03:25:40 +00:00 · 2025-10-02 03:25:40 +00:00 · 47ba96e148
parent cebf7612c6
commit 47ba96e148
15 changed files with 281668 additions and 159 deletions
--- a/.env
+++ b/.env
@ -37,13 +37,13 @@ JETSON_CPU_FREQ=1900  # MHz
 JETSON_MEMORY_FREQ=1600  # MHz
 # 세션 풀 설정 (시스템별 자동 조정)
-SIMPLE_LAMA_SESSIONS=4
+SIMPLE_LAMA_SESSIONS=6
-MIGAN_SESSIONS=4
+MIGAN_SESSIONS=6
-REMBG_SESSIONS=1
+REMBG_SESSIONS=3
 # 워커 설정 (Jetson은 더 적은 워커 사용)
-MAX_WORKERS=8  # Jetson: 4, x86: 8
+MAX_WORKERS=12  # Jetson: 4, x86: 8
-MIN_WORKERS=2  # Jetson: 1, x86: 2
+MIN_WORKERS=6  # Jetson: 1, x86: 2
 WORKER_TIMEOUT=300
 # VRAM 관리 (Jetson은 더 보수적인 설정)
--- a/app/api/endpoints.py
+++ b/app/api/endpoints.py
@ -157,6 +157,11 @@ async def health_check(request: Request):
        version="1.0.0"
    )
 # 호환용: 일부 클라이언트에서 /health 경로로 접근하는 경우가 있어 동일 응답 제공
@router.get("/health", response_model=HealthResponse, include_in_schema=False)
 async def health_check_compat(request: Request):
    return await health_check(request)
@router.get("/api/v1/server-config", response_model=ServerConfigResponse)
 async def get_server_config():
@ -292,6 +297,7 @@ async def inpaint_image(
        )
    except HTTPException:
        # HTTPException은 상세 사유를 에러 로그에 남길 수 있도록 재전파됨 (미들웨어에서 잡혀 JSONL 기록)
        raise
    except Exception as e:
        logger.error(f"인페인팅 처리 실패: {e}")
@ -396,6 +402,7 @@ async def remove_background(
            })
    except HTTPException:
        # HTTPException은 상세 사유를 에러 로그에 남길 수 있도록 재전파됨
        raise
    except Exception as e:
        logger.error(f"배경 제거 처리 실패: {e}")
@ -524,7 +531,7 @@ async def root():
        "message": "인페인팅 서버 API (iopaint 호환)",
        "version": "1.0.0",
        "docs": "/docs",
-        "health": "/health"
+        "health": "/api/v1/health"
    }
--- a/app/core/config.py
+++ b/app/core/config.py
@ -89,15 +89,15 @@ class Settings(BaseSettings):
    # =========================
    # 동적 세션 풀/메모리
    # =========================
-    SIMPLE_LAMA_MIN_SESSIONS: int = 2
+    SIMPLE_LAMA_MIN_SESSIONS: int = 4
-    SIMPLE_LAMA_MAX_SESSIONS: int = 4
+    SIMPLE_LAMA_MAX_SESSIONS: int = 8
    # x86에서는 MIGAN 미로딩(지연 로딩) 기본 → MIN=0
-    MIGAN_MIN_SESSIONS: int = 2 if IS_JETSON else 1
+    MIGAN_MIN_SESSIONS: int = 4 if IS_JETSON else 1
-    MIGAN_MAX_SESSIONS: int = 4
+    MIGAN_MAX_SESSIONS: int = 8
-    REMBG_MIN_SESSIONS: int = 2 if IS_JETSON else 1
+    REMBG_MIN_SESSIONS: int = 3 if IS_JETSON else 1
-    REMBG_MAX_SESSIONS: int = 4 if IS_JETSON else 3
+    REMBG_MAX_SESSIONS: int = 6 if IS_JETSON else 4
    # 여유 VRAM 비율(남은 VRAM이 이 값보다 커야 세션 추가)
    SESSION_VRAM_THRESHOLD: float = 0.30
@ -105,8 +105,8 @@ class Settings(BaseSettings):
    # 마이크로 배치(SimpleLAMA)
    USE_MICRO_BATCHING: bool = True
-    MICRO_BATCH_SIZE: int = 4
+    MICRO_BATCH_SIZE: int = 8
-    MICRO_BATCH_TIMEOUT_MS: int = 100
+    MICRO_BATCH_TIMEOUT_MS: int = 80
    # 사전 확정 세션(플랫폼 감안 기본치)
    SIMPLE_LAMA_SESSIONS: int = 4
@ -114,8 +114,8 @@ class Settings(BaseSettings):
    REMBG_SESSIONS: int = 3 if IS_JETSON else 2
    # 워커(내부 큐/스레드 워커, 프로세스는 WORKERS)
-    MAX_WORKERS: int = 4 if IS_JETSON else 8
+    MAX_WORKERS: int = 6 if IS_JETSON else 12
-    MIN_WORKERS: int = 1 if IS_JETSON else 4
+    MIN_WORKERS: int = 3 if IS_JETSON else 4
    WORKER_TIMEOUT: int = 120
    # =========================
@ -123,7 +123,7 @@ class Settings(BaseSettings):
    # =========================
    VRAM_THRESHOLD_HIGH: float = 0.70 if IS_JETSON else 0.80
    VRAM_THRESHOLD_LOW: float = 0.30 if IS_JETSON else 0.40
-    VRAM_CHECK_INTERVAL: int = 20 if IS_JETSON else 15  # 초
+    VRAM_CHECK_INTERVAL: int = 10 if IS_JETSON else 5  # 초
    # =========================
    # 모델/경로
--- a/app/core/session_pool.py
+++ b/app/core/session_pool.py
@ -14,6 +14,7 @@ from collections import defaultdict
 from ..core.config import settings
 from ..utils.gpu_monitor import gpu_monitor
 from ..utils.monitor_events import append_event
 logger = logging.getLogger(__name__)
@ -118,6 +119,16 @@ class SessionPool:
            )
            logger.info(f"Successfully created session {session_id}")
            self._log_pool_status("create", model_type.value)
            try:
                append_event({
                    "type": "session",
                    "action": "create",
                    "model": model_type.value,
                    "session_id": session_id,
                    "pool_size": len(self.pools[model_type]) + 1,
                })
            except Exception:
                pass
            return session
        except Exception as e:
            logger.error(f"Failed to create session {session_id}: {e}", exc_info=True)
@ -253,6 +264,15 @@ class SessionPool:
                            reaped_counts[session.model_type.value] += 1
                            del session.model
                            del session
                            try:
                                append_event({
                                    "type": "session",
                                    "action": "reap",
                                    "model": model_type.value,
                                    "pool_size": len(pool),
                                })
                            except Exception:
                                pass
                        self.conditions[model_type].notify_all()
--- a/app/core/worker_manager.py
+++ b/app/core/worker_manager.py
@ -16,6 +16,7 @@ from ..utils.gpu_monitor import gpu_monitor
 from ..core.config import settings
 from ..core.stats_manager import stats_manager
 from ..core.session_pool import ModelType
 from ..utils.monitor_events import append_event
 logger = logging.getLogger(__name__)
@ -230,12 +231,32 @@ class WorkerManager:
            await self._scale_workers(new_count)
            self.last_scale_time = current_time
            logger.info(f"Scaled up to {new_count} workers (VRAM: {vram_usage:.2f})")
            try:
                append_event({
                    "type": "worker_scale",
                    "action": "up",
                    "new_count": new_count,
                    "queue_size": queue_size,
                    "vram_usage": vram_usage,
                })
            except Exception:
                pass
        elif should_scale_down:
            new_count = max(total_workers - 1, settings.MIN_WORKERS)
            await self._scale_workers(new_count)
            self.last_scale_time = current_time
            logger.info(f"Scaled down to {new_count} workers (VRAM: {vram_usage:.2f})")
            try:
                append_event({
                    "type": "worker_scale",
                    "action": "down",
                    "new_count": new_count,
                    "queue_size": queue_size,
                    "vram_usage": vram_usage,
                })
            except Exception:
                pass
    async def _scale_workers(self, target_count: int):
        """워커 수를 조정합니다."""
--- a/app/monitoring/dashboard.py
+++ b/app/monitoring/dashboard.py
@ -243,8 +243,9 @@ class MonitoringData:
                "gpu": gpu_info,
                "system_memory": system_memory,
                "system_performance": system_performance,
-                "workers": worker_status,
+                # status.json 스냅샷 외에 실시간 상태를 병합
-                "sessions": session_status,
+                "workers": worker_manager.get_status() or worker_status,
                "sessions": session_pool.get_status() or session_status,
                "jetson": jetson_info,
                "api_stats": api_stats,
                "model_performance_stats": model_performance_stats,
@ -984,6 +985,17 @@ HTML_TEMPLATE = """
            </div>
        </div>
        <!-- 스케일/세션 타임라인 -->
        <div class="card">
            <h3>📈 워커·세션 타임라인</h3>
            <div id="scale-timeline" style="font-family:'Courier New',monospace;font-size:12px;background:#f8f9fa;border-radius:6px;padding:10px;max-height:220px;overflow:auto;">
                로딩 중...
            </div>
            <div style="margin-top:8px;text-align:right;">
                <button onclick="refreshTimeline()" style="padding:5px 12px;">새로고침</button>
            </div>
        </div>
        <!-- 최근 에러 -->
        <div class="card">
            <h3>🚨 최근 API 에러</h3>
@ -1117,7 +1129,8 @@ HTML_TEMPLATE = """
        function connectWebSocket() {
            try {
-                ws = new WebSocket(`ws://${window.location.host}/ws`);
+                const proto = (window.location.protocol === 'https:') ? 'wss' : 'ws';
                ws = new WebSocket(`${proto}://${window.location.host}/ws`);
                ws.onopen = function() {
                    console.log('WebSocket 연결이 성공했습니다.');
@ -1418,7 +1431,8 @@ HTML_TEMPLATE = """
                const path = e.path || '-';
                const status = e.status != null ? e.status : '-';
                const rt = e.response_time_ms != null ? e.response_time_ms : '-';
-                return `<div class="error-row"><div>${ts}</div><div>${method}</div><div>${status}</div><div>${path}</div><div>${rt}</div></div>`;
+                const ip = e.client_ip ? ` <span style='color:#888'>(IP: ${e.client_ip})</span>` : '';
                return `<div class="error-row"><div>${ts}</div><div>${method}</div><div>${status}</div><div>${path}${ip}</div><div>${rt}</div></div>`;
            }).join('');
            container.innerHTML = rows;
        }
@ -1705,6 +1719,32 @@ HTML_TEMPLATE = """
                });
        }
        function renderTimeline(events) {
            const el = document.getElementById('scale-timeline');
            if (!Array.isArray(events) || events.length === 0) {
                el.innerHTML = '<div style="color:#999;">이벤트 없음</div>';
                return;
            }
            const rows = events.slice().reverse().map(ev => {
                const ts = ev.timestamp ? new Date(ev.timestamp*1000).toLocaleTimeString() : '';
                if (ev.type === 'worker_scale') {
                    return `[${ts}] WORKERS ${ev.action.toUpperCase()} -> ${ev.new_count} (queue=${ev.queue_size}, vram=${(ev.vram_usage*100||0).toFixed(1)}%)`;
                }
                if (ev.type === 'session') {
                    return `[${ts}] SESSION ${ev.action.toUpperCase()} (${ev.model}) size=${ev.pool_size}`;
                }
                return `[${ts}] ${ev.type}`;
            }).join('\n');
            el.textContent = rows;
        }
        function refreshTimeline() {
            fetch('/api/scale-events')
                .then(r => r.json())
                .then(data => renderTimeline(data.events || []))
                .catch(() => { document.getElementById('scale-timeline').innerHTML = '<div style="color:#dc3545;">타임라인 로딩 실패</div>'; });
        }
        // 페이지 로드 시 초기화
        document.addEventListener('DOMContentLoaded', function() {
            // 로그 및 성능 통계 초기 로딩
@ -2014,6 +2054,16 @@ async def get_system_alerts():
        logger.error(f"시스템 알림 조회 실패: {e}")
        return {"alerts": [], "error": str(e)}
@api_router.get("/scale-events")
 def get_scale_events():
    """최근 스케일/세션 이벤트를 반환"""
    try:
        from ..utils.monitor_events import read_recent_events
        return {"events": read_recent_events(limit=300)}
    except Exception as e:
        logger.error(f"타임라인 조회 실패: {e}")
        return {"events": [], "error": str(e)}
@api_router.get("/errors", summary="최근 API 에러 목록")
 def get_recent_errors(limit: int = 50):
    """최근 API 에러를 반환합니다 (logs/api_errors.jsonl 기반)."""
@ -2297,3 +2347,29 @@ if __name__ == "__main__":
        port=settings.MONITORING_PORT,
        log_level="info"
    )
 # --- 외부 런처용: 로그에 시간 포함하여 실행 ---
 def _get_uvicorn_log_config():
    try:
        from uvicorn.config import LOGGING_CONFIG as DEFAULT
        import copy
        cfg = copy.deepcopy(DEFAULT)
        # 포맷에 시간 추가
        for fmt in ("default", "access"):
            if fmt in cfg.get("formatters", {}):
                cfg["formatters"][fmt]["format"] = "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
        return cfg
    except Exception:
        return None
 def run_monitor(host: str = "0.0.0.0", port: int = None):
    """모니터링 서버 실행 (시간 스탬프 포함 로그)"""
    _port = port or settings.MONITORING_PORT
    uvicorn.run(
        monitor_app,
        host=host,
        port=_port,
        log_level="info",
        log_config=_get_uvicorn_log_config()
    )
--- a/app/utils/api_error_log.py
+++ b/app/utils/api_error_log.py
@ -0,0 +1,84 @@
 """
 API 에러 로깅 유틸리티 (JSONL 기록 + 로테이션 + 클라이언트 IP 추출)
 """
 from __future__ import annotations
 import os
 import time
 import json
 import re
 from typing import Dict, Any
 from fastapi import Request
 LOG_DIR = "logs"
 os.makedirs(LOG_DIR, exist_ok=True)
 API_ERROR_LOG_PATH = os.path.join(LOG_DIR, "api_errors.jsonl")
 API_ERROR_MAX_BYTES = 10 * 1024 * 1024  # 10MB
 API_ERROR_BACKUP_COUNT = 5
 def _rotate_if_needed() -> None:
    try:
        if os.path.exists(API_ERROR_LOG_PATH) and os.path.getsize(API_ERROR_LOG_PATH) >= API_ERROR_MAX_BYTES:
            ts = time.strftime("%Y%m%d-%H%M%S")
            rotated_path = os.path.join(LOG_DIR, f"api_errors_{ts}.jsonl")
            os.replace(API_ERROR_LOG_PATH, rotated_path)
            rotated = [
                os.path.join(LOG_DIR, f) for f in os.listdir(LOG_DIR)
                if f.startswith("api_errors_") and f.endswith(".jsonl")
            ]
            rotated.sort(key=lambda p: os.path.getmtime(p), reverse=True)
            for old in rotated[API_ERROR_BACKUP_COUNT:]:
                try:
                    os.remove(old)
                except Exception:
                    pass
    except Exception:
        # 로테이션 실패는 치명적이지 않으므로 무시
        pass
 def append_api_error_log(record: Dict[str, Any]) -> None:
    try:
        _rotate_if_needed()
        with open(API_ERROR_LOG_PATH, "a", encoding="utf-8") as f:
            f.write(json.dumps(record, ensure_ascii=False) + "\n")
    except Exception:
        pass
 def extract_client_ip(request: Request) -> str:
    try:
        xff = request.headers.get("x-forwarded-for") or request.headers.get("X-Forwarded-For")
        if xff:
            first_ip = xff.split(",")[0].strip()
            if first_ip:
                return first_ip
        xri = request.headers.get("x-real-ip") or request.headers.get("X-Real-IP")
        if xri:
            return xri.strip()
        fwd = request.headers.get("forwarded") or request.headers.get("Forwarded")
        if fwd:
            m = re.search(r"for=([^;,\s]+)", fwd)
            if m:
                return m.group(1).strip('"')
        if request.client and request.client.host:
            return request.client.host
    except Exception:
        pass
    return ""
 def get_content_length(request: Request) -> int:
    try:
        v = request.headers.get("content-length") or request.headers.get("Content-Length")
        if v is None:
            return 0
        return int(v)
    except Exception:
        return 0
--- a/app/utils/monitor_events.py
+++ b/app/utils/monitor_events.py
@ -0,0 +1,69 @@
 """
 경량 모니터링 이벤트(JSONL) 기록 및 읽기 유틸
 - worker 스케일 업/다운
 - 세션 생성/회수
 """
 from __future__ import annotations
 import os
 import time
 import json
 from typing import Dict, Any, List
 LOG_DIR = "logs"
 os.makedirs(LOG_DIR, exist_ok=True)
 EVENT_LOG_PATH = os.path.join(LOG_DIR, "scale_events.jsonl")
 MAX_BYTES = 10 * 1024 * 1024  # 10MB
 BACKUP = 10
 def _rotate_if_needed():
    try:
        if os.path.exists(EVENT_LOG_PATH) and os.path.getsize(EVENT_LOG_PATH) > MAX_BYTES:
            ts = time.strftime("%Y%m%d-%H%M%S")
            os.replace(EVENT_LOG_PATH, os.path.join(LOG_DIR, f"scale_events_{ts}.jsonl"))
            rotated = [os.path.join(LOG_DIR, f) for f in os.listdir(LOG_DIR) if f.startswith("scale_events_")]
            rotated.sort(key=lambda p: os.path.getmtime(p), reverse=True)
            for p in rotated[BACKUP:]:
                try:
                    os.remove(p)
                except Exception:
                    pass
    except Exception:
        pass
 def append_event(event: Dict[str, Any]) -> None:
    try:
        _rotate_if_needed()
        if "timestamp" not in event:
            event["timestamp"] = time.time()
        with open(EVENT_LOG_PATH, "a", encoding="utf-8") as f:
            f.write(json.dumps(event, ensure_ascii=False) + "\n")
    except Exception:
        pass
 def read_recent_events(limit: int = 300) -> List[Dict[str, Any]]:
    try:
        if not os.path.exists(EVENT_LOG_PATH):
            return []
        events: List[Dict[str, Any]] = []
        with open(EVENT_LOG_PATH, "r", encoding="utf-8") as f:
            # 간단히 끝에서 limit줄만 읽기 (파일이 크지 않다고 가정)
            lines = f.readlines()[-limit:]
        for line in lines:
            line = line.strip()
            if not line:
                continue
            try:
                events.append(json.loads(line))
            except Exception:
                continue
        return events
    except Exception:
        return []
--- a/logs/main.log
+++ b/logs/main.log
--- a/logs/main_server.log
+++ b/logs/main_server.log
--- a/logs/main_server.pid
+++ b/logs/main_server.pid
@ -1 +1 @@
-192534
+271615
--- a/logs/monitoring.log
+++ b/logs/monitoring.log
@ -1,7 +1,69 @@
 WARNING:root:jtop library not found. Jetson monitoring will be limited. Please run 'sudo pip install jetson-stats'
-INFO:     Started server process [192741]
+INFO:     Started server process [271803]
 INFO:     Waiting for application startup.
 INFO:     Application startup complete.
 INFO:     Uvicorn running on http://0.0.0.0:8888 (Press CTRL+C to quit)
-INFO:     122.35.47.45:63439 - "WebSocket /ws" [accepted]
+INFO:     127.0.0.1:49994 - "GET /api/simple HTTP/1.1" 200 OK
 INFO:     118.235.73.64:35921 - "GET /api/logs?lines=50 HTTP/1.1" 200 OK
 INFO:     118.235.73.64:35815 - "GET /api/performance-stats HTTP/1.1" 200 OK
 INFO:     118.235.73.64:27910 - "GET /api/system-alerts HTTP/1.1" 200 OK
 INFO:     118.235.73.64:33163 - "GET /api/model-usage-stats HTTP/1.1" 200 OK
 INFO:     118.235.73.64:33780 - "WebSocket /ws" [accepted]
 INFO:     connection open
 INFO:     118.235.73.64:29039 - "GET /favicon.ico HTTP/1.1" 404 Not Found
 ERROR:app.monitoring.dashboard:데이터 전송 오류: 
 INFO:     connection closed
 INFO:     118.235.73.64:36365 - "WebSocket /ws" [accepted]
 ERROR:app.monitoring.dashboard:모델 성능 통계 조회 중 예외 발생: HTTPConnectionPool(host='0.0.0.0', port=8008): Read timed out. (read timeout=2)
 INFO:     connection open
 ERROR:app.monitoring.dashboard:모델 성능 통계 조회 중 예외 발생: HTTPConnectionPool(host='0.0.0.0', port=8008): Read timed out. (read timeout=2)
 INFO:     118.235.73.64:29969 - "GET /api/system-alerts HTTP/1.1" 200 OK
 INFO:     118.235.73.64:36188 - "GET /api/errors HTTP/1.1" 200 OK
 INFO:     118.235.73.64:34776 - "GET /api/model-usage-stats HTTP/1.1" 200 OK
 ERROR:app.monitoring.dashboard:모델 성능 통계 조회 중 예외 발생: HTTPConnectionPool(host='0.0.0.0', port=8008): Read timed out. (read timeout=2)
 INFO:     118.235.73.64:34776 - "GET /api/system-alerts HTTP/1.1" 200 OK
 ERROR:app.monitoring.dashboard:모델 성능 통계 조회 중 예외 발생: HTTPConnectionPool(host='0.0.0.0', port=8008): Read timed out. (read timeout=2)
 INFO:     118.235.73.64:36188 - "GET /api/errors HTTP/1.1" 200 OK
 INFO:     122.35.47.45:52268 - "GET / HTTP/1.1" 200 OK
 INFO:     118.235.73.64:36188 - "GET /api/logs?lines=50 HTTP/1.1" 200 OK
 INFO:     118.235.73.64:32011 - "GET /api/model-usage-stats HTTP/1.1" 200 OK
 INFO:     118.235.73.64:29864 - "GET /api/performance-stats HTTP/1.1" 200 OK
 INFO:     118.235.73.64:33588 - "GET /api/system-alerts HTTP/1.1" 200 OK
 INFO:     118.235.73.64:31627 - "GET /api/errors HTTP/1.1" 200 OK
 INFO:     118.235.73.64:32554 - "GET /api/system-alerts HTTP/1.1" 200 OK
 INFO:     118.235.73.64:36432 - "GET /api/errors HTTP/1.1" 200 OK
 INFO:     118.235.73.64:36432 - "GET /api/model-usage-stats HTTP/1.1" 200 OK
 INFO:     118.235.73.64:36432 - "GET /api/system-alerts HTTP/1.1" 200 OK
 INFO:     118.235.73.64:31023 - "GET /api/errors HTTP/1.1" 200 OK
 ERROR:app.monitoring.dashboard:모델 성능 통계 조회 중 예외 발생: HTTPConnectionPool(host='0.0.0.0', port=8008): Read timed out. (read timeout=2)
 ERROR:app.monitoring.dashboard:모델 성능 통계 조회 중 예외 발생: HTTPConnectionPool(host='0.0.0.0', port=8008): Read timed out. (read timeout=2)
 INFO:     118.235.73.64:30785 - "GET /api/model-usage-stats HTTP/1.1" 200 OK
 INFO:     118.235.73.64:35188 - "GET /api/system-alerts HTTP/1.1" 200 OK
 INFO:     118.235.73.64:30117 - "GET /api/performance-stats HTTP/1.1" 200 OK
 INFO:     118.235.73.64:30102 - "GET /api/logs?lines=50 HTTP/1.1" 200 OK
 INFO:     118.235.73.64:36300 - "GET /api/errors HTTP/1.1" 200 OK
 INFO:     118.235.73.64:30102 - "GET / HTTP/1.1" 200 OK
 INFO:     connection closed
 INFO:     118.235.73.64:30102 - "GET /favicon.ico HTTP/1.1" 404 Not Found
 INFO:     118.235.73.64:34126 - "GET /favicon.ico HTTP/1.1" 404 Not Found
 ERROR:app.monitoring.dashboard:데이터 전송 오류: 
 INFO:     118.235.73.64:27884 - "GET / HTTP/1.1" 200 OK
 WARNING:  Invalid HTTP request received.
 WARNING:  Invalid HTTP request received.
 INFO:     118.235.73.64:30128 - "GET / HTTP/1.1" 200 OK
 INFO:     118.235.73.64:30128 - "GET /favicon.ico HTTP/1.1" 404 Not Found
 INFO:     118.235.73.64:33038 - "GET /favicon.ico HTTP/1.1" 404 Not Found
 INFO:     118.235.73.64:34281 - "GET /favicon.ico HTTP/1.1" 404 Not Found
 INFO:     118.235.73.64:28205 - "GET / HTTP/1.1" 200 OK
 INFO:     118.235.73.64:28205 - "GET / HTTP/1.1" 200 OK
 INFO:     118.235.73.64:30339 - "GET / HTTP/1.1" 200 OK
 INFO:     118.235.73.64:30775 - "GET / HTTP/1.1" 200 OK
 INFO:     118.235.73.64:32870 - "GET / HTTP/1.1" 200 OK
 INFO:     118.235.73.64:28161 - "GET / HTTP/1.1" 200 OK
 INFO:     118.235.73.64:33203 - "GET / HTTP/1.1" 200 OK
 INFO:     118.235.73.64:37097 - "GET / HTTP/1.1" 200 OK
 INFO:     118.235.73.64:36624 - "GET / HTTP/1.1" 200 OK
 INFO:     118.235.73.64:30357 - "GET / HTTP/1.1" 200 OK
 INFO:     118.235.73.64:35740 - "GET / HTTP/1.1" 200 OK
 INFO:     122.35.47.45:50214 - "GET / HTTP/1.1" 200 OK
 INFO:     122.35.47.45:50214 - "GET / HTTP/1.1" 200 OK
--- a/logs/monitoring.pid
+++ b/logs/monitoring.pid
@ -1 +1 @@
-192741
+271803
--- a/main.py
+++ b/main.py
@ -7,9 +7,11 @@ import time
 import logging
 import json
 import asyncio
 import re
 from contextlib import asynccontextmanager
 from collections import defaultdict, deque
 from fastapi import FastAPI, Request, Response
 from fastapi import HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 import uvicorn
@ -21,6 +23,11 @@ from app.monitoring.dashboard import monitor_app
 from app.core.batch_manager import batch_manager
 # from app.utils.background_task import manage_state_background # TODO: 경로 확인 필요
 from app.utils.discord_notifier import send_discord_notification
 from app.utils.api_error_log import (
    append_api_error_log,
    extract_client_ip,
    get_content_length,
 )
 # 로깅 설정
 import logging.handlers
@ -177,6 +184,7 @@ api_stats = APIStatsCollector()
 # - 주기적 폴링으로 인해 실제 비즈니스 엔드포인트 통계를 왜곡시키지 않기 위함
 EXCLUDED_ENDPOINTS = {
    "/api/v1/health",
    "/health",
    "/docs",
    "/openapi.json",
    "/redoc",
@ -196,34 +204,19 @@ API_ERROR_LOG_PATH = os.path.join(log_dir, "api_errors.jsonl")
 API_ERROR_MAX_BYTES = 10 * 1024 * 1024  # 10MB
 API_ERROR_BACKUP_COUNT = 5
-def _rotate_api_error_log_if_needed():
+def _append_error_record(request: Request, status: int, response_time: float, error: str | None = None):
-    try:
+    client_ip = extract_client_ip(request)
-        if os.path.exists(API_ERROR_LOG_PATH) and os.path.getsize(API_ERROR_LOG_PATH) >= API_ERROR_MAX_BYTES:
+    content_length = get_content_length(request)
-            ts = time.strftime("%Y%m%d-%H%M%S")
+    append_api_error_log({
-            rotated_path = os.path.join(log_dir, f"api_errors_{ts}.jsonl")
+        "timestamp": time.time(),
-            os.replace(API_ERROR_LOG_PATH, rotated_path)
+        "method": request.method,
-            # 오래된 로테이션 파일 정리 (최신 N개만 유지)
+        "path": request.url.path,
-            rotated = [
+        "status": status,
-                os.path.join(log_dir, f) for f in os.listdir(log_dir)
+        "error": error,
-                if f.startswith("api_errors_") and f.endswith(".jsonl")
+        "response_time_ms": int(response_time * 1000),
-            ]
+        "client_ip": client_ip,
-            rotated.sort(key=lambda p: os.path.getmtime(p), reverse=True)
+        "content_length": content_length,
-            for old in rotated[API_ERROR_BACKUP_COUNT:]:
+    })
                try:
                    os.remove(old)
                except Exception:
                    pass
    except Exception as e:  # pragma: no cover
        logger.warning(f"API 에러 로그 로테이션 실패: {e}")
 def _append_api_error_log(record: dict):
    """에러 전용 JSONL 로그에 한 줄 추가"""
    try:
        _rotate_api_error_log_if_needed()
        with open(API_ERROR_LOG_PATH, "a", encoding="utf-8") as f:
            f.write(json.dumps(record, ensure_ascii=False) + "\n")
    except Exception as e:  # pragma: no cover
        logger.warning(f"API 에러 로그 기록 실패: {e}")
 async def save_status_periodically():
    """주기적으로 워커와 세션 상태를 파일에 저장합니다."""
@ -346,6 +339,31 @@ app = FastAPI(
    lifespan=lifespan
 )
 # 업로드 용량 선검사 미들웨어 (Content-Length 기반)
@app.middleware("http")
 async def content_length_guard(request: Request, call_next):
    try:
        # 업로드가 포함될 가능성이 높은 POST/PUT/PATCH만 검사
        if request.method in {"POST", "PUT", "PATCH"}:
            max_bytes = settings.MAX_FILE_SIZE  # MB 단위가 아닌 바이트로 받도록 설정되어 있음
            # settings.MAX_FILE_SIZE는 MB 단위였으나, 현재 코드에서는 바이트 사용.
            # 값이 과거 설정(바이트/MB 혼용)일 수 있어 1e4 이상이면 바이트로 간주, 작으면 MB로 간주하여 보정
            limit = max_bytes
            if limit < 1024 * 1024:  # MB일 가능성
                limit = max_bytes
            content_length = get_content_length(request)
            if content_length and content_length > limit:
                return Response(
                    content=json.dumps({
                        "detail": f"Request body too large: {content_length} bytes > limit {limit} bytes"
                    }),
                    status_code=413,
                    media_type="application/json",
                )
        return await call_next(request)
    except Exception:
        return await call_next(request)
 # API 통계 수집 미들웨어
@app.middleware("http")
 async def collect_api_stats(request: Request, call_next):
@ -375,15 +393,9 @@ async def collect_api_stats(request: Request, call_next):
        # 통계 업데이트
        api_stats.end_request(endpoint, success, response_time)
-        # 4xx/5xx는 에러 로그 파일에 기록
+        # 4xx/5xx는 에러 로그 파일에 기록 (클라이언트 IP 포함)
        if not success:
-            _append_api_error_log({
+            _append_error_record(request, response.status_code, response_time)
                "timestamp": time.time(),
                "method": request.method,
                "path": path,
                "status": response.status_code,
                "response_time_ms": int(response_time * 1000)
            })
        return response
@ -391,14 +403,7 @@ async def collect_api_stats(request: Request, call_next):
        # 에러 발생 시
        response_time = time.time() - start_time
        api_stats.end_request(endpoint, False, response_time, str(e))
-        _append_api_error_log({
+        _append_error_record(request, 500, response_time, str(e))
            "timestamp": time.time(),
            "method": request.method,
            "path": path,
            "status": 500,
            "error": str(e),
            "response_time_ms": int(response_time * 1000)
        })
        raise
 # CORS 미들웨어 추가
--- a/status.json
+++ b/status.json
@ -1,19 +1,47 @@
 {
  "worker_status": {
    "running": true,
-    "total_workers": 2,
+    "total_workers": 6,
    "queue_size": 0,
    "workers_by_status": {
      "idle": [
        {
-          "id": "worker_eac127f0",
+          "id": "worker_3e3d5864",
          "status": "idle",
          "task_count": 0,
          "error_count": 0,
          "last_task_at": null
        },
        {
-          "id": "worker_3fb51aaf",
+          "id": "worker_7373b97c",
          "status": "idle",
          "task_count": 0,
          "error_count": 0,
          "last_task_at": null
        },
        {
          "id": "worker_296ab3a4",
          "status": "idle",
          "task_count": 0,
          "error_count": 0,
          "last_task_at": null
        },
        {
          "id": "worker_891dcc94",
          "status": "idle",
          "task_count": 0,
          "error_count": 0,
          "last_task_at": null
        },
        {
          "id": "worker_411ad2e8",
          "status": "idle",
          "task_count": 0,
          "error_count": 0,
          "last_task_at": null
        },
        {
          "id": "worker_ab487451",
          "status": "idle",
          "task_count": 0,
          "error_count": 0,
@ -28,64 +56,75 @@
  },
  "session_status": {
    "simple_lama": {
-      "min": 2,
+      "min": 4,
-      "max": 4,
+      "max": 8,
-      "total": 2,
+      "total": 4,
      "in_use": 0,
-      "available": 2
+      "available": 4
    },
    "migan": {
      "min": 1,
-      "max": 4,
+      "max": 8,
      "total": 1,
      "in_use": 0,
      "available": 1
    },
    "rembg": {
      "min": 1,
-      "max": 3,
+      "max": 4,
      "total": 1,
      "in_use": 0,
      "available": 1
    }
  },
  "api_stats": {
-    "total_requests": 0,
+    "total_requests": 13467,
-    "successful_requests": 0,
+    "successful_requests": 13467,
    "failed_requests": 0,
-    "success_rate": 0.0,
+    "success_rate": 100.0,
-    "endpoint_usage": {},
+    "endpoint_usage": {
      "GET /api/v1/model": 6740,
      "POST /api/v1/inpaint": 6264,
      "POST /api/v1/run_plugin_gen_image": 463
    },
    "endpoint_stats": {
-      "POST /api/v1/inpaint": {
+      "GET /api/v1/model": {
-        "count": 0,
+        "count": 6740,
-        "avg_time": 0.0,
+        "avg_time": 0.001540846824645996,
-        "min_time": 0.0,
+        "min_time": 0.00063323974609375,
-        "max_time": 0.0,
+        "max_time": 0.004244089126586914,
        "current_concurrent": 0
      },
      "POST /api/v1/inpaint": {
        "count": 6264,
        "avg_time": 0.5280383849143981,
        "min_time": 0.2597086429595947,
        "max_time": 1.651228666305542,
        "current_concurrent": 5
      },
      "POST /api/v1/run_plugin_gen_image": {
        "count": 463,
        "avg_time": 0.4474348998069763,
        "min_time": 0.1340315341949463,
        "max_time": 2.5062549114227295,
        "current_concurrent": 1
      },
      "POST /api/v1/remove_bg": {
        "count": 0,
        "avg_time": 0.0,
        "min_time": 0.0,
        "max_time": 0.0,
        "current_concurrent": 0
      },
      "POST /api/v1/run_plugin_gen_image": {
        "count": 0,
        "avg_time": 0.0,
        "min_time": 0.0,
        "max_time": 0.0,
        "current_concurrent": 0
      }
    },
-    "average_response_time": 0,
+    "average_response_time": 0.2757347109317779,
-    "min_response_time": 0,
+    "min_response_time": 0.0005586147308349609,
-    "max_response_time": 0,
+    "max_response_time": 2.8400533199310303,
-    "current_concurrent": 0,
+    "current_concurrent": 6,
-    "max_concurrent": 0,
+    "max_concurrent": 12,
-    "requests_per_second": 0.0,
+    "requests_per_second": 1.3742163667312552,
-    "uptime": 5.007766008377075,
+    "uptime": 9799.76685333252,
    "recent_errors": []
  },
-  "timestamp": 1759306699.0522153
+  "timestamp": 1759375540.6194317
 }