inpaintServer/app/monitoring/dashboard.py

921 lines
34 KiB
Python

"""
워커 감시 대시보드
실시간으로 워커 상태, GPU 사용량, 세션 풀 상태를 모니터링합니다.
Jetson Xavier와 x86 시스템을 모두 지원합니다.
"""
import asyncio
import json
import logging
import time
import psutil
import os
from datetime import datetime, timedelta
from typing import Dict, List, Any
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
from fastapi.staticfiles import StaticFiles
from fastapi.responses import HTMLResponse
import uvicorn
from ..core.worker_manager import worker_manager
from ..core.session_pool import session_pool
from ..utils.gpu_monitor import gpu_monitor
from ..core.config import settings
logger = logging.getLogger(__name__)
# 모니터링 앱 생성
monitor_app = FastAPI(
title="인페인팅 서버 모니터링 대시보드",
description="실시간 서버 상태 모니터링 (Jetson Xavier & x86 지원)",
version="1.0.0"
)
# 연결된 WebSocket 클라이언트들
connected_clients: List[WebSocket] = []
class MonitoringData:
def __init__(self):
self.history: List[Dict[str, Any]] = []
self.max_history = 100 # 최대 100개 데이터 포인트 저장
self.api_stats = {
"total_requests": 0,
"successful_requests": 0,
"failed_requests": 0,
"endpoint_usage": {},
"response_times": [],
"errors": []
}
self.alerts = []
async def collect_data(self) -> Dict[str, Any]:
"""현재 시스템 상태 데이터를 수집합니다."""
timestamp = datetime.now().isoformat()
# GPU 정보
gpu_info = gpu_monitor.get_gpu_memory_info()
gpu_utilization = gpu_monitor.get_gpu_utilization()
# 시스템 메모리 정보
system_memory = gpu_monitor.get_system_memory_info()
# 시스템 성능 지표
system_performance = self._get_system_performance()
# 워커 매니저 상태
worker_status = worker_manager.get_status()
# 세션 풀 상태
session_status = await session_pool.get_pool_status()
# Jetson 전용 정보
jetson_info = {}
if settings.IS_JETSON:
jetson_info = gpu_monitor.get_jetson_specific_info()
# API 통계
api_stats = self._get_api_statistics()
# 알림 및 경고
alerts = self._check_alerts(gpu_info, system_memory, worker_status)
data = {
"timestamp": timestamp,
"system_type": "Jetson Xavier" if settings.IS_JETSON else "x86_64",
"gpu": {
**gpu_info,
"utilization": gpu_utilization
},
"system_memory": system_memory,
"system_performance": system_performance,
"workers": worker_status,
"sessions": session_status,
"jetson": jetson_info,
"api_stats": api_stats,
"alerts": alerts
}
# 히스토리에 추가
self.history.append(data)
if len(self.history) > self.max_history:
self.history.pop(0)
return data
def _get_system_performance(self) -> Dict[str, Any]:
"""시스템 성능 지표를 수집합니다."""
try:
# CPU 사용률
cpu_percent = psutil.cpu_percent(interval=1)
cpu_count = psutil.cpu_count()
cpu_freq = psutil.cpu_freq()
# 디스크 I/O
disk_io = psutil.disk_io_counters()
# 네트워크 I/O
net_io = psutil.net_io_counters()
# 프로세스 정보
processes = len(psutil.pids())
# 시스템 부하
load_avg = os.getloadavg() if hasattr(os, 'getloadavg') else [0, 0, 0]
return {
"cpu": {
"usage_percent": cpu_percent,
"count": cpu_count,
"frequency_mhz": cpu_freq.current if cpu_freq else 0,
"load_average": {
"1min": load_avg[0],
"5min": load_avg[1],
"15min": load_avg[2]
}
},
"disk": {
"read_bytes": disk_io.read_bytes if disk_io else 0,
"write_bytes": disk_io.write_bytes if disk_io else 0,
"read_count": disk_io.read_count if disk_io else 0,
"write_count": disk_io.write_count if disk_io else 0
},
"network": {
"bytes_sent": net_io.bytes_sent if net_io else 0,
"bytes_recv": net_io.bytes_recv if net_io else 0,
"packets_sent": net_io.packets_sent if net_io else 0,
"packets_recv": net_io.packets_recv if net_io else 0
},
"processes": processes
}
except Exception as e:
logger.error(f"시스템 성능 정보 수집 실패: {e}")
return {}
def _get_api_statistics(self) -> Dict[str, Any]:
"""API 통계 정보를 반환합니다."""
# 실제 구현에서는 API 엔드포인트에서 이 정보를 수집해야 합니다
return {
"total_requests": self.api_stats["total_requests"],
"successful_requests": self.api_stats["successful_requests"],
"failed_requests": self.api_stats["failed_requests"],
"success_rate": (
(self.api_stats["successful_requests"] / max(self.api_stats["total_requests"], 1)) * 100
),
"endpoint_usage": self.api_stats["endpoint_usage"],
"average_response_time": (
sum(self.api_stats["response_times"]) / max(len(self.api_stats["response_times"]), 1)
) if self.api_stats["response_times"] else 0,
"recent_errors": self.api_stats["errors"][-5:] # 최근 5개 에러
}
def _check_alerts(self, gpu_info: Dict, system_memory: Dict, worker_status: Dict) -> List[Dict]:
"""시스템 상태를 확인하고 알림을 생성합니다."""
alerts = []
current_time = datetime.now()
# GPU 메모리 경고
if gpu_info.get("usage_percent", 0) > 90:
alerts.append({
"level": "critical",
"message": f"GPU 메모리 사용률이 높습니다: {gpu_info.get('usage_percent', 0):.1f}%",
"timestamp": current_time.isoformat(),
"category": "gpu"
})
elif gpu_info.get("usage_percent", 0) > 80:
alerts.append({
"level": "warning",
"message": f"GPU 메모리 사용률이 높습니다: {gpu_info.get('usage_percent', 0):.1f}%",
"timestamp": current_time.isoformat(),
"category": "gpu"
})
# 시스템 메모리 경고
if system_memory.get("usage_percent", 0) > 90:
alerts.append({
"level": "critical",
"message": f"시스템 메모리 사용률이 높습니다: {system_memory.get('usage_percent', 0):.1f}%",
"timestamp": current_time.isoformat(),
"category": "memory"
})
# 워커 상태 경고
if worker_status.get("active_workers", 0) == 0:
alerts.append({
"level": "critical",
"message": "활성 워커가 없습니다",
"timestamp": current_time.isoformat(),
"category": "workers"
})
# Jetson 전용 경고
if settings.IS_JETSON:
# 온도 경고 (실제 구현에서는 온도 정보를 가져와야 함)
pass
return alerts
def update_api_stats(self, endpoint: str, success: bool, response_time: float, error: str = None):
"""API 통계를 업데이트합니다."""
self.api_stats["total_requests"] += 1
if success:
self.api_stats["successful_requests"] += 1
else:
self.api_stats["failed_requests"] += 1
if error:
self.api_stats["errors"].append({
"timestamp": datetime.now().isoformat(),
"endpoint": endpoint,
"error": error
})
# 엔드포인트별 사용량
if endpoint not in self.api_stats["endpoint_usage"]:
self.api_stats["endpoint_usage"][endpoint] = 0
self.api_stats["endpoint_usage"][endpoint] += 1
# 응답 시간
self.api_stats["response_times"].append(response_time)
if len(self.api_stats["response_times"]) > 100:
self.api_stats["response_times"].pop(0)
# 에러 로그 제한
if len(self.api_stats["errors"]) > 50:
self.api_stats["errors"] = self.api_stats["errors"][-50:]
def get_history(self) -> List[Dict[str, Any]]:
"""데이터 히스토리를 반환합니다."""
return self.history
def get_statistics(self) -> Dict[str, Any]:
"""통계 정보를 반환합니다."""
if not self.history:
return {}
recent_data = self.history[-10:] # 최근 10개 데이터
# GPU 사용률 평균
gpu_usage_avg = sum(d["gpu"]["usage_percent"] for d in recent_data) / len(recent_data)
gpu_util_avg = sum(d["gpu"]["utilization"] for d in recent_data) / len(recent_data)
# 시스템 메모리 사용률 평균
sys_mem_avg = sum(d["system_memory"]["usage_percent"] for d in recent_data) / len(recent_data)
# 워커 수 평균
worker_avg = sum(d["workers"]["active_workers"] for d in recent_data) / len(recent_data)
return {
"gpu_usage_avg": round(gpu_usage_avg, 2),
"gpu_util_avg": round(gpu_util_avg, 2),
"system_memory_avg": round(sys_mem_avg, 2),
"worker_avg": round(worker_avg, 2),
"data_points": len(recent_data)
}
# 전역 모니터링 데이터 인스턴스
monitoring_data = MonitoringData()
# HTML 템플릿
HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang="ko">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>인페인팅 서버 모니터링</title>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
margin: 0;
padding: 20px;
background-color: #f5f5f5;
}
.container {
max-width: 1600px;
margin: 0 auto;
}
.header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 20px;
border-radius: 10px;
margin-bottom: 20px;
text-align: center;
}
.header h1 {
margin: 0;
font-size: 2.5em;
}
.header p {
margin: 10px 0 0 0;
opacity: 0.9;
}
.grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: 20px;
margin-bottom: 20px;
}
.card {
background: white;
border-radius: 10px;
padding: 20px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
transition: transform 0.2s;
}
.card:hover {
transform: translateY(-2px);
}
.card h3 {
margin-top: 0;
color: #333;
border-bottom: 2px solid #667eea;
padding-bottom: 10px;
}
.metric {
display: flex;
justify-content: space-between;
align-items: center;
margin: 15px 0;
padding: 10px;
background: #f8f9fa;
border-radius: 5px;
}
.metric-label {
font-weight: 500;
color: #555;
}
.metric-value {
font-weight: bold;
font-size: 1.1em;
color: #667eea;
}
.chart-container {
background: white;
border-radius: 10px;
padding: 20px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
margin-bottom: 20px;
}
.chart-container h3 {
margin-top: 0;
color: #333;
border-bottom: 2px solid #667eea;
padding-bottom: 10px;
}
.alerts {
background: white;
border-radius: 10px;
padding: 20px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
margin-bottom: 20px;
}
.alert {
padding: 10px;
margin: 10px 0;
border-radius: 5px;
border-left: 4px solid;
}
.alert.critical {
background: #ffe6e6;
border-left-color: #dc3545;
color: #721c24;
}
.alert.warning {
background: #fff3cd;
border-left-color: #ffc107;
color: #856404;
}
.alert.info {
background: #d1ecf1;
border-left-color: #17a2b8;
color: #0c5460;
}
.endpoint-usage {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 10px;
margin-top: 15px;
}
.endpoint-item {
background: #f8f9fa;
padding: 10px;
border-radius: 5px;
text-align: center;
}
.endpoint-name {
font-weight: bold;
color: #333;
}
.endpoint-count {
color: #667eea;
font-size: 1.2em;
}
.system-info {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 15px;
}
.status-indicator {
display: inline-block;
width: 12px;
height: 12px;
border-radius: 50%;
margin-right: 8px;
}
.status-online { background: #28a745; }
.status-offline { background: #dc3545; }
.status-warning { background: #ffc107; }
.refresh-time {
text-align: center;
color: #666;
font-size: 0.9em;
margin-top: 20px;
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🚀 인페인팅 서버 모니터링</h1>
<p>실시간 서버 상태 및 성능 모니터링 대시보드</p>
</div>
<!-- 시스템 개요 -->
<div class="grid">
<div class="card">
<h3>🖥️ 시스템 정보</h3>
<div class="metric">
<span class="metric-label">시스템 타입:</span>
<span class="metric-value" id="system-type">-</span>
</div>
<div class="metric">
<span class="metric-label">CPU 사용률:</span>
<span class="metric-value" id="cpu-usage">-</span>
</div>
<div class="metric">
<span class="metric-label">시스템 메모리:</span>
<span class="metric-value" id="system-memory">-</span>
</div>
<div class="metric">
<span class="metric-label">프로세스 수:</span>
<span class="metric-value" id="process-count">-</span>
</div>
</div>
<div class="card">
<h3>🎮 GPU 상태</h3>
<div class="metric">
<span class="metric-label">GPU 메모리:</span>
<span class="metric-value" id="gpu-memory">-</span>
</div>
<div class="metric">
<span class="metric-label">GPU 사용률:</span>
<span class="metric-value" id="gpu-util">-</span>
</div>
<div class="metric">
<span class="metric-label">GPU 온도:</span>
<span class="metric-value" id="gpu-temp">-</span>
</div>
<div class="metric">
<span class="metric-label">GPU 클럭:</span>
<span class="metric-value" id="gpu-clock">-</span>
</div>
</div>
<div class="card">
<h3>⚙️ 워커 상태</h3>
<div class="metric">
<span class="metric-label">활성 워커:</span>
<span class="metric-value" id="worker-count">-</span>
</div>
<div class="metric">
<span class="metric-label">세션 풀:</span>
<span class="metric-value" id="session-pool">-</span>
</div>
<div class="metric">
<span class="metric-label">대기열:</span>
<span class="metric-value" id="queue-size">-</span>
</div>
<div class="metric">
<span class="metric-label">상태:</span>
<span class="metric-value" id="worker-status">-</span>
</div>
</div>
<div class="card">
<h3>📊 API 통계</h3>
<div class="metric">
<span class="metric-label">총 요청:</span>
<span class="metric-value" id="total-requests">-</span>
</div>
<div class="metric">
<span class="metric-label">성공률:</span>
<span class="metric-value" id="success-rate">-</span>
</div>
<div class="metric">
<span class="metric-label">평균 응답시간:</span>
<span class="metric-value" id="avg-response-time">-</span>
</div>
<div class="metric">
<span class="metric-label">에러 수:</span>
<span class="metric-value" id="error-count">-</span>
</div>
</div>
</div>
<!-- 시스템 성능 상세 -->
<div class="card">
<h3>🔍 시스템 성능 상세</h3>
<div class="system-info">
<div>
<h4>CPU 정보</h4>
<div class="metric">
<span class="metric-label">코어 수:</span>
<span class="metric-value" id="cpu-count">-</span>
</div>
<div class="metric">
<span class="metric-label">클럭 속도:</span>
<span class="metric-value" id="cpu-freq">-</span>
</div>
<div class="metric">
<span class="metric-label">부하 평균 (1분):</span>
<span class="metric-value" id="load-1min">-</span>
</div>
<div class="metric">
<span class="metric-label">부하 평균 (5분):</span>
<span class="metric-value" id="load-5min">-</span>
</div>
</div>
<div>
<h4>디스크 I/O</h4>
<div class="metric">
<span class="metric-label">읽기 (MB/s):</span>
<span class="metric-value" id="disk-read">-</span>
</div>
<div class="metric">
<span class="metric-label">쓰기 (MB/s):</span>
<span class="metric-value" id="disk-write">-</span>
</div>
<div class="metric">
<span class="metric-label">읽기 횟수:</span>
<span class="metric-value" id="disk-read-count">-</span>
</div>
<div class="metric">
<span class="metric-label">쓰기 횟수:</span>
<span class="metric-value" id="disk-write-count">-</span>
</div>
</div>
<div>
<h4>네트워크 I/O</h4>
<div class="metric">
<span class="metric-label">송신 (MB):</span>
<span class="metric-value" id="net-sent">-</span>
</div>
<div class="metric">
<span class="metric-label">수신 (MB):</span>
<span class="metric-value" id="net-recv">-</span>
</div>
<div class="metric">
<span class="metric-label">송신 패킷:</span>
<span class="metric-value" id="net-sent-pkts">-</span>
</div>
<div class="metric">
<span class="metric-label">수신 패킷:</span>
<span class="metric-value" id="net-recv-pkts">-</span>
</div>
</div>
</div>
</div>
<!-- 엔드포인트 사용량 -->
<div class="card">
<h3>🌐 엔드포인트 사용량</h3>
<div class="endpoint-usage" id="endpoint-usage">
<div class="endpoint-item">
<div class="endpoint-name">로딩 중...</div>
<div class="endpoint-count">-</div>
</div>
</div>
</div>
<!-- 알림 및 경고 -->
<div class="alerts">
<h3>⚠️ 알림 및 경고</h3>
<div id="alerts-container">
<div class="alert info">
<strong>정보:</strong> 모니터링 데이터를 수집 중입니다...
</div>
</div>
</div>
<!-- 차트 -->
<div class="chart-container">
<h3>📈 실시간 성능 차트</h3>
<canvas id="performanceChart" width="400" height="200"></canvas>
</div>
<div class="chart-container">
<h3>🎯 GPU 메모리 사용량</h3>
<canvas id="gpuChart" width="400" height="200"></canvas>
</div>
<div class="refresh-time">
마지막 업데이트: <span id="last-update">-</span>
</div>
</div>
<script>
// 차트 초기화
const performanceCtx = document.getElementById('performanceChart').getContext('2d');
const gpuCtx = document.getElementById('gpuChart').getContext('2d');
const performanceChart = new Chart(performanceCtx, {
type: 'line',
data: {
labels: [],
datasets: [{
label: 'GPU 사용률 (%)',
data: [],
borderColor: 'rgb(75, 192, 192)',
tension: 0.1
}, {
label: '시스템 메모리 (%)',
data: [],
borderColor: 'rgb(255, 99, 132)',
tension: 0.1
}]
},
options: {
responsive: true,
scales: {
y: {
beginAtZero: true,
max: 100
}
}
}
});
const gpuChart = new Chart(gpuCtx, {
type: 'line',
data: {
labels: [],
datasets: [{
label: 'GPU 메모리 사용률 (%)',
data: [],
borderColor: 'rgb(54, 162, 235)',
backgroundColor: 'rgba(54, 162, 235, 0.1)',
tension: 0.1
}]
},
options: {
responsive: true,
scales: {
y: {
beginAtZero: true,
max: 100
}
}
}
});
// WebSocket 연결
const ws = new WebSocket(`ws://${window.location.host}/ws`);
ws.onmessage = function(event) {
const data = JSON.parse(event.data);
updateDashboard(data);
};
ws.onclose = function() {
console.log('WebSocket 연결이 종료되었습니다.');
setTimeout(() => {
location.reload();
}, 5000);
};
function updateDashboard(data) {
// 기본 메트릭 업데이트
document.getElementById('system-type').textContent = data.system_type;
document.getElementById('cpu-usage').textContent = data.system_performance?.cpu?.usage_percent?.toFixed(1) + '%' || '-';
document.getElementById('system-memory').textContent = data.system_memory?.usage_percent?.toFixed(1) + '%' || '-';
document.getElementById('process-count').textContent = data.system_performance?.processes || '-';
document.getElementById('gpu-memory').textContent = data.gpu?.usage_percent?.toFixed(1) + '%' || '-';
document.getElementById('gpu-util').textContent = data.gpu?.utilization?.toFixed(1) + '%' || '-';
document.getElementById('gpu-temp').textContent = data.jetson?.temperature?.gpu?.toFixed(1) + '°C' || '-';
document.getElementById('gpu-clock').textContent = data.jetson?.gpu_frequency?.toFixed(0) + 'MHz' || '-';
document.getElementById('worker-count').textContent = data.workers?.active_workers || '-';
document.getElementById('session-pool').textContent = data.sessions?.total_sessions || '-';
document.getElementById('queue-size').textContent = data.workers?.queue_size || '-';
document.getElementById('worker-status').textContent = data.workers?.status || '-';
// API 통계 업데이트
document.getElementById('total-requests').textContent = data.api_stats?.total_requests || '-';
document.getElementById('success-rate').textContent = (data.api_stats?.success_rate?.toFixed(1) || '0') + '%';
document.getElementById('avg-response-time').textContent = (data.api_stats?.average_response_time?.toFixed(2) || '0') + 'ms';
document.getElementById('error-count').textContent = data.api_stats?.failed_requests || '-';
// 시스템 성능 상세 업데이트
if (data.system_performance?.cpu) {
document.getElementById('cpu-count').textContent = data.system_performance.cpu.count || '-';
document.getElementById('cpu-freq').textContent = (data.system_performance.cpu.frequency_mhz?.toFixed(0) || '0') + 'MHz';
document.getElementById('load-1min').textContent = data.system_performance.cpu.load_average?.toFixed(2) || '-';
document.getElementById('load-5min').textContent = data.system_performance.cpu.load_average?.toFixed(2) || '-';
}
if (data.system_performance?.disk) {
document.getElementById('disk-read').textContent = (data.system_performance.disk.read_bytes / 1024 / 1024).toFixed(2);
document.getElementById('disk-write').textContent = (data.system_performance.disk.write_bytes / 1024 / 1024).toFixed(2);
document.getElementById('disk-read-count').textContent = data.system_performance.disk.read_count || '-';
document.getElementById('disk-write-count').textContent = data.system_performance.disk.write_count || '-';
}
if (data.system_performance?.network) {
document.getElementById('net-sent').textContent = (data.system_performance.network.bytes_sent / 1024 / 1024).toFixed(2);
document.getElementById('net-recv').textContent = (data.system_performance.network.bytes_recv / 1024 / 1024).toFixed(2);
document.getElementById('net-sent-pkts').textContent = data.system_performance.network.packets_sent || '-';
document.getElementById('net-recv-pkts').textContent = data.system_performance.network.packets_recv || '-';
}
// 엔드포인트 사용량 업데이트
updateEndpointUsage(data.api_stats?.endpoint_usage || {});
// 알림 업데이트
updateAlerts(data.alerts || []);
// 차트 업데이트
updateCharts(data);
// 마지막 업데이트 시간
document.getElementById('last-update').textContent = new Date().toLocaleTimeString();
}
function updateEndpointUsage(endpointUsage) {
const container = document.getElementById('endpoint-usage');
container.innerHTML = '';
if (Object.keys(endpointUsage).length === 0) {
container.innerHTML = '<div class="endpoint-item"><div class="endpoint-name">사용량 없음</div><div class="endpoint-count">-</div></div>';
return;
}
Object.entries(endpointUsage).forEach(([endpoint, count]) => {
const item = document.createElement('div');
item.className = 'endpoint-item';
item.innerHTML = `
<div class="endpoint-name">${endpoint}</div>
<div class="endpoint-count">${count}</div>
`;
container.appendChild(item);
});
}
function updateAlerts(alerts) {
const container = document.getElementById('alerts-container');
container.innerHTML = '';
if (alerts.length === 0) {
container.innerHTML = '<div class="alert info"><strong>정보:</strong> 현재 알림이 없습니다.</div>';
return;
}
alerts.forEach(alert => {
const alertDiv = document.createElement('div');
alertDiv.className = `alert ${alert.level}`;
alertDiv.innerHTML = `
<strong>${alert.level.toUpperCase()}:</strong> ${alert.message}
<br><small>${new Date(alert.timestamp).toLocaleString()}</small>
`;
container.appendChild(alertDiv);
});
}
function updateCharts(data) {
const timestamp = new Date().toLocaleTimeString();
// 성능 차트 업데이트
performanceChart.data.labels.push(timestamp);
performanceChart.data.datasets[0].data.push(data.gpu?.utilization || 0);
performanceChart.data.datasets[1].data.push(data.system_memory?.usage_percent || 0);
if (performanceChart.data.labels.length > 20) {
performanceChart.data.labels.shift();
performanceChart.data.datasets[0].data.shift();
performanceChart.data.datasets[1].data.shift();
}
// GPU 차트 업데이트
gpuChart.data.labels.push(timestamp);
gpuChart.data.datasets[0].data.push(data.gpu?.usage_percent || 0);
if (gpuChart.data.labels.length > 20) {
gpuChart.data.labels.shift();
gpuChart.data.datasets[0].data.shift();
}
performanceChart.update();
gpuChart.update();
}
</script>
</body>
</html>
"""
@monitor_app.get("/")
async def dashboard():
"""대시보드 HTML 페이지"""
return HTMLResponse(content=HTML_TEMPLATE)
@monitor_app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
"""WebSocket 연결을 처리합니다."""
await websocket.accept()
connected_clients.append(websocket)
try:
while True:
# 클라이언트로부터 메시지 대기 (연결 유지용)
await websocket.receive_text()
except WebSocketDisconnect:
connected_clients.remove(websocket)
logger.info("클라이언트 연결 해제")
@monitor_app.get("/api/status")
async def get_current_status():
"""현재 상태를 JSON으로 반환합니다."""
return await monitoring_data.collect_data()
@monitor_app.get("/api/history")
async def get_history():
"""데이터 히스토리를 반환합니다."""
return {
"history": monitoring_data.get_history(),
"statistics": monitoring_data.get_statistics()
}
async def broadcast_data():
"""연결된 모든 클라이언트에게 데이터를 브로드캐스트합니다."""
while True:
try:
if connected_clients:
data = await monitoring_data.collect_data()
message = json.dumps(data, ensure_ascii=False)
# 연결이 끊어진 클라이언트 제거
disconnected = []
for client in connected_clients:
try:
await client.send_text(message)
except Exception:
disconnected.append(client)
for client in disconnected:
connected_clients.remove(client)
await asyncio.sleep(2) # 2초마다 업데이트
except Exception as e:
logger.error(f"브로드캐스트 오류: {e}")
await asyncio.sleep(5)
@monitor_app.on_event("startup")
async def start_monitoring():
"""모니터링 시작"""
logger.info("모니터링 대시보드 시작")
# Jetson 최적화 (시작 시)
if settings.IS_JETSON:
logger.info("Jetson Xavier 모드로 모니터링 시작")
gpu_monitor.optimize_for_jetson()
asyncio.create_task(broadcast_data())
if __name__ == "__main__":
# 로깅 설정
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
# 모니터링 서버 실행
uvicorn.run(
"app.monitoring.dashboard:monitor_app",
host="0.0.0.0",
port=settings.MONITORING_PORT,
log_level="info"
)