x-celery-cmd: &celery_cmd [ "celery", "-A", "worker.celery_worker.celery_app", "worker", "-Q", "celery,translate,rembg,inpaint", "-l", "info", "--pool=threads", # ← fork 안 씀 "--concurrency=1", "--max-tasks-per-child=0", # 300개 마다 재시작 "--max-memory-per-child=500000", # 메모리 사용량 500MB 마다 재시작 # ★ 머신 프리픽스 "--hostname=7600M@%h", # 결과 예: 7600M@fd_test-worker-1 ] services: worker: build: context: . dockerfile: Dockerfile.worker # ───── 중요: container_name 제거 → 컨테이너마다 고유 이름 자동 부여 # container_name: celery_worker_remote ← 삭제 env_file: - .env.worker volumes: - ./temp_files/debug:/app/temp_files/debug # ← 디버그 산출물 - ./worker/modules:/app/modules:ro - ./worker/models:/app/models:ro - ./worker/trt_cache:/app/trt_cache - ./worker/torch_cache:/app/torch_cache # - ./worker/patches/simple_lama/model.py:/patches/model.py:ro # - ./worker/patches/patch-entrypoint.sh:/app/patch-entrypoint.sh:ro - /etc/localtime:/etc/localtime:ro runtime: nvidia # Compose v2 environment: - TZ=Asia/Seoul # 모든 GPU 사용 (단일 머신·멀티 카드면 OK) - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility - NVIDIA_REQUIRE_CUDA=cuda>=11.8 - OCR_REC_LABEL_FILE=/app/modules/PP_Models/rec/ppocr_keys_v1.txt - FLAGS_use_cuda_managed_memory=true # 선택: 초기가용 메모리/총 점유 비율 제한 - FLAGS_initial_gpu_memory_in_mb=200 # 초기 선점 줄이기 - FLAGS_fraction_of_gpu_memory_to_use=0.85 # 선택: 할당 전략 변경(파편화 완화) - FLAGS_use_stream_safe_cuda_allocator=true # - FLAGS_allocator_strategy=auto_growth # - FLAGS_use_cuda_managed_memory=false - DEBUG_DUMP_DIR=/app/temp_files/debug - DEBUG_DUMP_ENABLE=1 # entrypoint: ["/bin/bash", "/app/patch-entrypoint.sh"] command: *celery_cmd restart: unless-stopped healthcheck: test: ["CMD", "celery", "-A", "worker.celery_worker.celery_app", "inspect", "ping"] interval: 60s timeout: 10s retries: 3 # ───── Swarm 모드일 때만 적용 (stand-alone compose에선 무시) deploy: replicas: 1 # ← 원하는 워커 개수 restart_policy: condition: any resources: limits: memory: 1.5G # child 2 × LaMa 로딩 메모리 예상치 reservations: devices: - capabilities: ["gpu"] # Swarm용 GPU 예약