ImageProcessor_MainServer/docker-compose.worker.yml

81 lines
2.7 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

x-celery-cmd: &celery_cmd
[
"celery", "-A", "worker.celery_worker.celery_app", "worker",
"-Q", "celery,translate,rembg,inpaint",
"-l", "info",
"--pool=threads", # ← fork 안 씀
"--concurrency=1",
"--max-tasks-per-child=0", # 300개 마다 재시작
"--max-memory-per-child=500000", # 메모리 사용량 500MB 마다 재시작
# ★ 머신 프리픽스
"--hostname=7600M@%h", # 결과 예: 7600M@fd_test-worker-1
]
services:
worker:
build:
context: .
dockerfile: Dockerfile.worker
# ───── 중요: container_name 제거 → 컨테이너마다 고유 이름 자동 부여
# container_name: celery_worker_remote ← 삭제
env_file:
- .env.worker
volumes:
- ./temp_files/debug:/app/temp_files/debug # ← 디버그 산출물
- ./worker/modules:/app/modules:ro
- ./worker/models:/app/models:ro
- ./worker/trt_cache:/app/trt_cache
- ./worker/torch_cache:/app/torch_cache
# - ./worker/patches/simple_lama/model.py:/patches/model.py:ro
# - ./worker/patches/patch-entrypoint.sh:/app/patch-entrypoint.sh:ro
- /etc/localtime:/etc/localtime:ro
runtime: nvidia # Compose v2
environment:
- TZ=Asia/Seoul
# 모든 GPU 사용 (단일 머신·멀티 카드면 OK)
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- NVIDIA_REQUIRE_CUDA=cuda>=11.8
- OCR_REC_LABEL_FILE=/app/modules/PP_Models/rec/ppocr_keys_v1.txt
- FLAGS_use_cuda_managed_memory=true
# 선택: 초기가용 메모리/총 점유 비율 제한
- FLAGS_initial_gpu_memory_in_mb=200 # 초기 선점 줄이기
- FLAGS_fraction_of_gpu_memory_to_use=0.85
# 선택: 할당 전략 변경(파편화 완화)
- FLAGS_use_stream_safe_cuda_allocator=true
# - FLAGS_allocator_strategy=auto_growth
# - FLAGS_use_cuda_managed_memory=false
- DEBUG_DUMP_DIR=/app/temp_files/debug
- DEBUG_DUMP_ENABLE=1
# entrypoint: ["/bin/bash", "/app/patch-entrypoint.sh"]
command: *celery_cmd
restart: unless-stopped
healthcheck:
test: ["CMD", "celery", "-A", "worker.celery_worker.celery_app", "inspect", "ping"]
interval: 60s
timeout: 10s
retries: 3
# ───── Swarm 모드일 때만 적용 (stand-alone compose에선 무시)
deploy:
replicas: 1 # ← 원하는 워커 개수
restart_policy:
condition: any
resources:
limits:
memory: 1.5G # child 2 × LaMa 로딩 메모리 예상치
reservations:
devices:
- capabilities: ["gpu"] # Swarm용 GPU 예약