623 lines
27 KiB
Python
623 lines
27 KiB
Python
"""
|
|
GPU 메모리 모니터링 유틸리티
|
|
Jetson Xavier와 x86 시스템을 모두 지원합니다.
|
|
"""
|
|
import asyncio
|
|
import psutil
|
|
import logging
|
|
import subprocess
|
|
import os
|
|
from typing import Dict, Optional, List
|
|
|
|
try:
|
|
import jtop
|
|
JTOP_AVAILABLE = True
|
|
except ImportError:
|
|
JTOP_AVAILABLE = False
|
|
logging.warning("jtop library not found. Jetson monitoring will be limited. Please run 'sudo pip install jetson-stats'")
|
|
|
|
try:
|
|
import pynvml
|
|
NVML_AVAILABLE = True
|
|
except ImportError:
|
|
NVML_AVAILABLE = False
|
|
logging.warning("pynvml not available. GPU monitoring will be limited.")
|
|
|
|
from ..core.config import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class JetsonMonitor:
|
|
"""Jetson Xavier 전용 모니터링 클래스"""
|
|
|
|
def __init__(self):
|
|
self.jetson_clocks_path = "/sys/kernel/debug/clk"
|
|
self.jetson_thermal_path = "/sys/devices/virtual/thermal"
|
|
self.jetson_power_path = "/sys/kernel/debug/tegra_pcie/pcie_power"
|
|
self._jtop = None
|
|
if JTOP_AVAILABLE:
|
|
try:
|
|
self._jtop = jtop.jtop()
|
|
self._jtop.start()
|
|
except Exception as e:
|
|
logger.error(f"Failed to initialize jtop: {e}")
|
|
self._jtop = None
|
|
|
|
def get_gpu_memory_info(self) -> Dict[str, float]:
|
|
"""Jetson 전용 GPU 메모리 정보를 가져옵니다."""
|
|
if self._jtop and self._jtop.ok:
|
|
try:
|
|
stats = self._jtop.stats
|
|
ram = stats.get('RAM', {})
|
|
total_mb = ram.get('tot', 0)
|
|
used_mb = ram.get('use', 0)
|
|
if total_mb > 0:
|
|
return {
|
|
"total": total_mb,
|
|
"used": used_mb,
|
|
"free": total_mb - used_mb,
|
|
"usage_percent": (used_mb / total_mb) * 100,
|
|
"free_ratio": (total_mb - used_mb) / total_mb
|
|
}
|
|
except Exception as e:
|
|
logger.warning(f"Failed to get memory info from jtop: {e}")
|
|
|
|
# jtop 실패 시 fallback 로직 (기존 코드)
|
|
try:
|
|
# 1. Jetson GPU 클래스에서 정보 읽기
|
|
if os.path.exists("/sys/class/nvidia-gpu"):
|
|
try:
|
|
# Jetson GPU 디바이스 정보 확인
|
|
gpu_devices = [d for d in os.listdir("/sys/class/nvidia-gpu") if d.startswith("nvidia")]
|
|
if gpu_devices:
|
|
logger.debug(f"Jetson GPU devices found: {gpu_devices}")
|
|
|
|
# GPU 메모리 정보 수집
|
|
total_memory = 0
|
|
used_memory = 0
|
|
|
|
for device in gpu_devices:
|
|
device_path = f"/sys/class/nvidia-gpu/{device}"
|
|
|
|
# 메모리 정보 파일들 확인
|
|
memory_files = [
|
|
"total_memory",
|
|
"memory_used",
|
|
"memory_free",
|
|
"memory_usage"
|
|
]
|
|
|
|
for mem_file in memory_files:
|
|
file_path = f"{device_path}/{mem_file}"
|
|
if os.path.exists(file_path):
|
|
try:
|
|
with open(file_path, "r") as f:
|
|
value = f.read().strip()
|
|
logger.debug(f"{mem_file}: {value}")
|
|
except:
|
|
pass
|
|
|
|
# 실제 메모리 정보가 있으면 반환
|
|
if total_memory > 0:
|
|
return {
|
|
"total": round(total_memory / 1024, 2),
|
|
"used": round(used_memory / 1024, 2),
|
|
"free": round((total_memory - used_memory) / 1024, 2),
|
|
"usage_percent": round((used_memory / total_memory) * 100, 2) if total_memory > 0 else 0
|
|
}
|
|
except Exception as e:
|
|
logger.debug(f"Jetson GPU class read failed: {e}")
|
|
|
|
# 2. /sys/kernel/debug/gpu/memory에서 읽기 시도 (권한이 있는 경우)
|
|
if os.path.exists("/sys/kernel/debug/gpu/memory"):
|
|
try:
|
|
with open("/sys/kernel/debug/gpu/memory", "r") as f:
|
|
content = f.read()
|
|
logger.debug(f"GPU memory debug info: {content}")
|
|
|
|
# 메모리 정보 파싱
|
|
lines = content.split('\n')
|
|
total_mb = 0
|
|
used_mb = 0
|
|
|
|
for line in lines:
|
|
if "Total" in line and "MB" in line:
|
|
try:
|
|
total_mb = float(line.split()[-2])
|
|
except (ValueError, IndexError):
|
|
pass
|
|
elif "Used" in line and "MB" in line:
|
|
try:
|
|
used_mb = float(line.split()[-2])
|
|
except (ValueError, IndexError):
|
|
pass
|
|
|
|
if total_mb > 0:
|
|
free_mb = total_mb - used_mb
|
|
usage_percent = (used_mb / total_mb) * 100
|
|
return {
|
|
"total": round(total_mb / 1024, 2), # GB
|
|
"used": round(used_mb / 1024, 2), # GB
|
|
"free": round(free_mb / 1024, 2), # GB
|
|
"usage_percent": round(usage_percent, 2)
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.debug(f"GPU memory debug read failed: {e}")
|
|
|
|
# 3. tegrastats 사용 (가장 안정적)
|
|
if self._tegrastats_available():
|
|
return self._get_memory_from_tegrastats()
|
|
|
|
# 4. GV11B GPU 정보 확인 (Jetson Xavier)
|
|
if os.path.exists("/sys/firmware/devicetree/base/gv11b"):
|
|
logger.info("GV11B GPU (Jetson Xavier) 감지됨")
|
|
# Jetson Xavier는 통합 메모리 사용
|
|
return {
|
|
"total": 8.0, # 8GB 통합 메모리
|
|
"used": 0.0,
|
|
"free": 8.0,
|
|
"usage_percent": 0.0
|
|
}
|
|
|
|
# 5. 기본값 반환
|
|
logger.warning("GPU 메모리 정보를 가져올 수 없습니다. 기본값을 사용합니다.")
|
|
mem = psutil.virtual_memory()
|
|
total_mb = mem.total / 1024**2
|
|
used_mb = mem.used / 1024**2
|
|
return {
|
|
"total": total_mb, "used": used_mb, "free": mem.available / 1024**2,
|
|
"usage_percent": mem.percent, "free_ratio": mem.available / mem.total
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Jetson GPU memory read failed: {e}")
|
|
return {"total": 0, "used": 0, "free": 0, "usage_percent": 0, "free_ratio": 0}
|
|
|
|
def __del__(self):
|
|
if self._jtop and self._jtop.ok:
|
|
self._jtop.close()
|
|
|
|
def _tegrastats_available(self) -> bool:
|
|
"""tegrastats 명령어 사용 가능 여부 확인"""
|
|
try:
|
|
result = subprocess.run(["which", "tegrastats"],
|
|
capture_output=True, text=True, timeout=5)
|
|
return result.returncode == 0
|
|
except:
|
|
return False
|
|
|
|
def _get_memory_from_tegrastats(self) -> Dict[str, float]:
|
|
"""tegrastats에서 메모리 정보 추출"""
|
|
try:
|
|
# tegrastats 올바른 옵션으로 실행
|
|
result = subprocess.run(["timeout", "3", "tegrastats", "--interval", "500"],
|
|
capture_output=True, text=True, timeout=10)
|
|
|
|
if result.returncode == 0 or result.returncode == 124: # timeout도 허용
|
|
output = result.stdout
|
|
|
|
# GPU 메모리 정보 파싱
|
|
# 예시: "GR3D_FREQ 0% @ 114MHz GR3D_FREQ 0% @ 114MHz"
|
|
# "RAM 0/8192MB (lfb 0x0) @ 1600MHz"
|
|
|
|
total_gb = 8.0 # Jetson Xavier 기본값
|
|
used_gb = 0.0
|
|
|
|
# RAM 사용량 파싱
|
|
for line in output.split('\n'):
|
|
if "RAM" in line and "MB" in line:
|
|
try:
|
|
# "RAM 1024/8192MB" 형태에서 추출
|
|
parts = line.split()
|
|
for part in parts:
|
|
if "/" in part and "MB" in part:
|
|
used_str, total_str = part.split('/')
|
|
used_mb = float(used_str)
|
|
total_mb = float(total_str.replace('MB', ''))
|
|
used_gb = used_mb / 1024
|
|
total_gb = total_mb / 1024
|
|
break
|
|
except (ValueError, IndexError):
|
|
pass
|
|
|
|
free_gb = total_gb - used_gb
|
|
usage_percent = (used_gb / total_gb) * 100 if total_gb > 0 else 0
|
|
|
|
return {
|
|
"total": round(total_gb, 2),
|
|
"used": round(used_gb, 2),
|
|
"free": round(free_gb, 2),
|
|
"usage_percent": round(usage_percent, 2)
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.debug(f"tegrastats parsing failed: {e}")
|
|
|
|
# 기본값 반환
|
|
return {
|
|
"total": 8.0,
|
|
"used": 0.0,
|
|
"free": 8.0,
|
|
"usage_percent": 0.0
|
|
}
|
|
|
|
def get_gpu_utilization(self) -> float:
|
|
"""Jetson 전용 GPU 사용률을 가져옵니다."""
|
|
try:
|
|
# tegrastats에서 GPU 사용률 추출 (올바른 옵션 사용)
|
|
if self._tegrastats_available():
|
|
result = subprocess.run(["timeout", "3", "tegrastats", "--interval", "500"],
|
|
capture_output=True, text=True, timeout=10)
|
|
|
|
if result.returncode == 0 or result.returncode == 124: # timeout도 허용
|
|
output = result.stdout
|
|
logger.debug(f"tegrastats output: {output}")
|
|
|
|
# GR3D_FREQ (GPU 사용률) 파싱 - 개선된 로직
|
|
lines = output.split('\n')
|
|
for line in lines:
|
|
if "GR3D_FREQ" in line:
|
|
try:
|
|
# "GR3D_FREQ 45%" 또는 "GR3D_FREQ 0%" 형태에서 추출
|
|
import re
|
|
match = re.search(r'GR3D_FREQ\s+(\d+(?:\.\d+)?)%', line)
|
|
if match:
|
|
usage = float(match.group(1))
|
|
logger.debug(f"Parsed GPU utilization: {usage}%")
|
|
return min(usage, 100.0)
|
|
|
|
# 대안 파싱: 공백으로 분리
|
|
parts = line.split()
|
|
gr3d_idx = -1
|
|
for i, part in enumerate(parts):
|
|
if "GR3D_FREQ" in part:
|
|
gr3d_idx = i
|
|
break
|
|
|
|
if gr3d_idx >= 0 and gr3d_idx + 1 < len(parts):
|
|
next_part = parts[gr3d_idx + 1]
|
|
if "%" in next_part:
|
|
usage = float(next_part.replace('%', ''))
|
|
logger.debug(f"Alternative parsed GPU utilization: {usage}%")
|
|
return min(usage, 100.0)
|
|
|
|
except (ValueError, IndexError, AttributeError) as e:
|
|
logger.debug(f"GPU parsing error: {e}")
|
|
continue
|
|
|
|
# 대안 1: /sys/devices/gpu.0/load 확인
|
|
gpu_load_paths = [
|
|
"/sys/devices/gpu.0/load",
|
|
"/sys/kernel/debug/gpu/load",
|
|
"/sys/class/devfreq/17000000.gv11b/load"
|
|
]
|
|
|
|
for load_path in gpu_load_paths:
|
|
if os.path.exists(load_path):
|
|
try:
|
|
with open(load_path, "r") as f:
|
|
load_str = f.read().strip()
|
|
# 숫자만 추출
|
|
import re
|
|
numbers = re.findall(r'\d+', load_str)
|
|
if numbers:
|
|
load = float(numbers[0])
|
|
logger.debug(f"GPU load from {load_path}: {load}")
|
|
return min(load, 100.0)
|
|
except Exception as e:
|
|
logger.debug(f"Failed to read {load_path}: {e}")
|
|
continue
|
|
|
|
# 대안 2: jtop 스타일 파싱 시도
|
|
try:
|
|
result = subprocess.run(["cat", "/proc/stat"],
|
|
capture_output=True, text=True, timeout=2)
|
|
if result.returncode == 0:
|
|
# GPU 관련 정보가 있는지 확인
|
|
pass
|
|
except:
|
|
pass
|
|
|
|
logger.debug("No GPU utilization found, returning 0.0")
|
|
return 0.0
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Jetson GPU utilization read failed: {e}")
|
|
return 0.0
|
|
|
|
def get_gpu_frequency(self) -> Optional[int]:
|
|
"""GPU 클럭 주파수를 가져옵니다 (MHz)"""
|
|
try:
|
|
if os.path.exists(f"{self.jetson_clocks_path}/gpcclk/clk_rate"):
|
|
with open(f"{self.jetson_clocks_path}/gpcclk/clk_rate", "r") as f:
|
|
freq = int(f.read().strip()) // 1000000 # Hz to MHz
|
|
return freq
|
|
except Exception as e:
|
|
logger.debug(f"GPU frequency read failed: {e}")
|
|
return None
|
|
|
|
def get_cpu_frequency(self) -> Optional[int]:
|
|
"""CPU 클럭 주파수를 가져옵니다 (MHz)"""
|
|
try:
|
|
if os.path.exists(f"{self.jetson_clocks_path}/cpu_gpcclk/clk_rate"):
|
|
with open(f"{self.jetson_clocks_path}/cpu_gpcclk/clk_rate", "r") as f:
|
|
freq = int(f.read().strip()) // 1000000 # Hz to MHz
|
|
return freq
|
|
except Exception as e:
|
|
logger.debug(f"CPU frequency read failed: {e}")
|
|
return None
|
|
|
|
def get_memory_frequency(self) -> Optional[int]:
|
|
"""메모리 클럭 주파수를 가져옵니다 (MHz)"""
|
|
try:
|
|
if os.path.exists(f"{self.jetson_clocks_path}/emc/clk_rate"):
|
|
with open(f"{self.jetson_clocks_path}/emc/clk_rate", "r") as f:
|
|
freq = int(f.read().strip()) // 1000000 # Hz to MHz
|
|
return freq
|
|
except Exception as e:
|
|
logger.debug(f"Memory frequency read failed: {e}")
|
|
return None
|
|
|
|
def get_temperature(self) -> Dict[str, float]:
|
|
"""Jetson 온도 정보를 가져옵니다"""
|
|
temps = {}
|
|
try:
|
|
if os.path.exists(self.jetson_thermal_path):
|
|
for item in os.listdir(self.jetson_thermal_path):
|
|
if item.startswith("thermal_zone"):
|
|
temp_file = f"{self.jetson_thermal_path}/{item}/temp"
|
|
if os.path.exists(temp_file):
|
|
with open(temp_file, "r") as f:
|
|
temp = int(f.read().strip()) / 1000.0 # mC to C
|
|
zone_name = f"zone_{item.split('_')[-1]}"
|
|
temps[zone_name] = temp
|
|
except Exception as e:
|
|
logger.debug(f"Temperature read failed: {e}")
|
|
return temps
|
|
|
|
def get_power_consumption(self) -> Optional[float]:
|
|
"""전력 소비량을 가져옵니다 (W)"""
|
|
try:
|
|
# Jetson 전력 모니터링 (가능한 경우)
|
|
if os.path.exists("/sys/bus/i2c/devices/1-0040/iio_device/in_power0_input"):
|
|
with open("/sys/bus/i2c/devices/1-0040/iio_device/in_power0_input", "r") as f:
|
|
power = float(f.read().strip()) / 1000.0 # mW to W
|
|
return power
|
|
except Exception as e:
|
|
logger.debug(f"Power consumption read failed: {e}")
|
|
return None
|
|
|
|
def set_power_mode(self, mode: str) -> bool:
|
|
"""전력 모드를 설정합니다"""
|
|
try:
|
|
if mode in ["MAXN", "5W", "10W", "15W"]:
|
|
result = subprocess.run(
|
|
["sudo", "nvpmodel", "-m", mode],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=10
|
|
)
|
|
if result.returncode == 0:
|
|
logger.info(f"Power mode set to {mode}")
|
|
return True
|
|
else:
|
|
logger.warning(f"Failed to set power mode: {result.stderr}")
|
|
else:
|
|
logger.error(f"Invalid power mode: {mode}")
|
|
except Exception as e:
|
|
logger.error(f"Power mode setting failed: {e}")
|
|
return False
|
|
|
|
def set_fan_speed(self, speed: int) -> bool:
|
|
"""팬 속도를 설정합니다 (0-255)"""
|
|
try:
|
|
if 0 <= speed <= 255:
|
|
fan_path = "/sys/devices/pwm-fan/target_pwm"
|
|
if os.path.exists(fan_path):
|
|
with open(fan_path, "w") as f:
|
|
f.write(str(speed))
|
|
logger.info(f"Fan speed set to {speed}")
|
|
return True
|
|
else:
|
|
logger.warning("Fan control not available")
|
|
else:
|
|
logger.error(f"Invalid fan speed: {speed}")
|
|
except Exception as e:
|
|
logger.error(f"Fan speed setting failed: {e}")
|
|
return False
|
|
|
|
def get_jetson_info(self) -> Dict[str, any]:
|
|
"""Jetson 전체 정보를 가져옵니다"""
|
|
info = {
|
|
"gpu_frequency": self.get_gpu_frequency(),
|
|
"cpu_frequency": self.get_cpu_frequency(),
|
|
"memory_frequency": self.get_memory_frequency(),
|
|
"temperature": self.get_temperature(),
|
|
"power_consumption": self.get_power_consumption(),
|
|
"power_mode": self._get_current_power_mode()
|
|
}
|
|
return info
|
|
|
|
def _get_current_power_mode(self) -> str:
|
|
"""현재 전력 모드를 가져옵니다"""
|
|
try:
|
|
result = subprocess.run(
|
|
["nvpmodel", "-q"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=5
|
|
)
|
|
if result.returncode == 0:
|
|
for line in result.stdout.split('\n'):
|
|
if 'NV Power Mode:' in line:
|
|
return line.split(':')[-1].strip()
|
|
except Exception:
|
|
pass
|
|
return "Unknown"
|
|
|
|
|
|
class GPUMonitor:
|
|
def __init__(self):
|
|
self.initialized = False
|
|
self.is_jetson = settings.IS_JETSON
|
|
self.jetson_monitor = JetsonMonitor() if self.is_jetson else None
|
|
|
|
if NVML_AVAILABLE and not self.is_jetson:
|
|
try:
|
|
pynvml.nvmlInit()
|
|
self.initialized = True
|
|
logger.info("GPU monitoring initialized successfully")
|
|
except Exception as e:
|
|
logger.error(f"Failed to initialize GPU monitoring: {e}")
|
|
elif self.is_jetson:
|
|
logger.info("Jetson Xavier mode detected - using Jetson-specific monitoring")
|
|
self.initialized = True
|
|
|
|
def get_gpu_memory_info(self, device_id: int = 0) -> Dict[str, float]:
|
|
"""GPU 메모리 정보를 반환합니다.
|
|
반환 형식 통일:
|
|
- keys: total, used, free, usage_percent, free_ratio, unit
|
|
- unit: "GiB" 또는 "MiB"
|
|
"""
|
|
if self.is_jetson:
|
|
info = self.jetson_monitor.get_gpu_memory_info()
|
|
# Jetson 쪽 반환 값에 누락된 키 보정 및 단위 명시
|
|
if info:
|
|
total = info.get("total", 0)
|
|
used = info.get("used", 0)
|
|
free = info.get("free", 0)
|
|
# Jetson 경로는 MiB 기반으로 동작하도록 통일
|
|
unit = "MiB"
|
|
# 일부 fallback 경로는 GB를 반환할 수 있어 값이 작으면 GB로 간주 → MiB로 변환
|
|
if total and total < 100: # 100 GiB 미만이면 GB일 가능성
|
|
total, used, free = total * 1024, used * 1024, free * 1024
|
|
usage_percent = info.get("usage_percent", (used / total * 100) if total else 0)
|
|
free_ratio = info.get("free_ratio", (free / total) if total else 0)
|
|
return {
|
|
"total": total,
|
|
"used": used,
|
|
"free": free,
|
|
"usage_percent": round(float(usage_percent), 2),
|
|
"free_ratio": round(float(free_ratio), 4),
|
|
"unit": unit,
|
|
}
|
|
return {"total": 0, "used": 0, "free": 0, "usage_percent": 0, "free_ratio": 0, "unit": "MiB"}
|
|
|
|
if not self.initialized or not NVML_AVAILABLE:
|
|
return {"total": 0, "used": 0, "free": 0, "usage_percent": 0, "free_ratio": 0, "unit": "GiB"}
|
|
|
|
try:
|
|
handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
|
|
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
|
|
|
|
total_gib = mem_info.total / 1024**3
|
|
used_gib = mem_info.used / 1024**3
|
|
free_gib = mem_info.free / 1024**3
|
|
usage_percent = (used_gib / total_gib) * 100 if total_gib else 0
|
|
|
|
return {
|
|
"total": round(total_gib, 2),
|
|
"used": round(used_gib, 2),
|
|
"free": round(free_gib, 2),
|
|
"usage_percent": round(usage_percent, 2),
|
|
"free_ratio": round((free_gib / total_gib), 4) if total_gib else 0,
|
|
"unit": "GiB",
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Error getting GPU memory info: {e}")
|
|
return {"total": 0, "used": 0, "free": 0, "usage_percent": 0, "free_ratio": 0, "unit": "GiB"}
|
|
|
|
def get_gpu_utilization(self, device_id: int = 0) -> float:
|
|
"""GPU 사용률을 반환합니다."""
|
|
if self.is_jetson:
|
|
return self.jetson_monitor.get_gpu_utilization()
|
|
|
|
if not self.initialized or not NVML_AVAILABLE:
|
|
return 0.0
|
|
|
|
try:
|
|
handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
|
|
util = pynvml.nvmlDeviceGetUtilizationRates(handle)
|
|
return float(util.gpu)
|
|
except Exception as e:
|
|
logger.error(f"Error getting GPU utilization: {e}")
|
|
return 0.0
|
|
|
|
def get_system_memory_info(self) -> Dict[str, float]:
|
|
"""시스템 메모리 정보를 반환합니다."""
|
|
mem = psutil.virtual_memory()
|
|
return {
|
|
"total": round(mem.total / 1024**3, 2), # GB
|
|
"used": round(mem.used / 1024**3, 2), # GB
|
|
"free": round(mem.free / 1024**3, 2), # GB
|
|
"usage_percent": round(mem.percent, 2)
|
|
}
|
|
|
|
def get_jetson_specific_info(self) -> Dict[str, any]:
|
|
"""Jetson 전용 정보를 반환합니다."""
|
|
if not self.is_jetson or not self.jetson_monitor:
|
|
return {}
|
|
|
|
return self.jetson_monitor.get_jetson_info()
|
|
|
|
def should_scale_up(self, vram_usage: float, threshold: float) -> bool:
|
|
"""스케일 업 여부를 결정합니다."""
|
|
return vram_usage < threshold
|
|
|
|
def should_scale_down(self, vram_usage: float, threshold: float) -> bool:
|
|
"""스케일 다운 여부를 결정합니다."""
|
|
return vram_usage > threshold
|
|
|
|
def optimize_for_jetson(self) -> bool:
|
|
"""Jetson 최적화를 수행합니다."""
|
|
if not self.is_jetson or not self.jetson_monitor:
|
|
return False
|
|
|
|
try:
|
|
# 전력 모드 설정
|
|
power_mode = settings.JETSON_POWER_MODE
|
|
if power_mode != "MAXN":
|
|
self.jetson_monitor.set_power_mode(power_mode)
|
|
|
|
# 팬 제어 활성화
|
|
if settings.JETSON_FAN_CONTROL:
|
|
# 온도에 따른 팬 속도 조정
|
|
temps = self.jetson_monitor.get_temperature()
|
|
max_temp = max(temps.values()) if temps else 0
|
|
|
|
if max_temp > settings.JETSON_TEMP_THRESHOLD:
|
|
self.jetson_monitor.set_fan_speed(255) # 최대 속도
|
|
elif max_temp > 60:
|
|
self.jetson_monitor.set_fan_speed(128) # 중간 속도
|
|
else:
|
|
self.jetson_monitor.set_fan_speed(64) # 낮은 속도
|
|
|
|
logger.info("Jetson optimization completed")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Jetson optimization failed: {e}")
|
|
return False
|
|
|
|
def get_comprehensive_gpu_info(self) -> Dict[str, any]:
|
|
"""GPU와 Jetson 정보를 종합적으로 반환합니다."""
|
|
gpu_info = {
|
|
"memory": self.get_gpu_memory_info(),
|
|
"utilization": self.get_gpu_utilization(),
|
|
"system_memory": self.get_system_memory_info()
|
|
}
|
|
|
|
if self.is_jetson:
|
|
gpu_info["jetson"] = self.get_jetson_specific_info()
|
|
gpu_info["platform"] = "Jetson Xavier"
|
|
else:
|
|
gpu_info["platform"] = "x86_64"
|
|
|
|
return gpu_info
|
|
|
|
|
|
# 전역 GPU 모니터 인스턴스
|
|
gpu_monitor = GPUMonitor()
|