x-celery-cmd: &celery_cmd
  [
    "celery", "-A", "worker.celery_worker.celery_app", "worker",
    "-Q", "celery,translate,rembg,inpaint",
    "-l", "info",
    "--pool=threads",           # ← fork 안 씀
    "--concurrency=1",
    "--max-tasks-per-child=0", # 300개 마다 재시작
    "--max-memory-per-child=500000", # 메모리 사용량 500MB 마다 재시작
    # ★ 머신 프리픽스
    "--hostname=7600M@%h",           # 결과 예: 7600M@fd_test-worker-1
  ]


services:
  worker:
    build:
      context: .
      dockerfile: Dockerfile.worker

    # ───── 중요: container_name 제거 → 컨테이너마다 고유 이름 자동 부여
    # container_name: celery_worker_remote   ← 삭제

    env_file:
      - .env.worker
    volumes:
      - ./temp_files/debug:/app/temp_files/debug            # ← 디버그 산출물
      - ./worker/modules:/app/modules:ro
      - ./worker/models:/app/models:ro
      - ./worker/trt_cache:/app/trt_cache
      - ./worker/torch_cache:/app/torch_cache
      # - ./worker/patches/simple_lama/model.py:/patches/model.py:ro
      # - ./worker/patches/patch-entrypoint.sh:/app/patch-entrypoint.sh:ro
      - /etc/localtime:/etc/localtime:ro

    runtime: nvidia                              # Compose v2
    environment:
      - TZ=Asia/Seoul
      # 모든 GPU 사용 (단일 머신·멀티 카드면 OK)
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
      - NVIDIA_REQUIRE_CUDA=cuda>=11.8
      - OCR_REC_LABEL_FILE=/app/modules/PP_Models/rec/ppocr_keys_v1.txt

      - FLAGS_use_cuda_managed_memory=true
      # 선택: 초기가용 메모리/총 점유 비율 제한
      - FLAGS_initial_gpu_memory_in_mb=200    # 초기 선점 줄이기
      - FLAGS_fraction_of_gpu_memory_to_use=0.85
      # 선택: 할당 전략 변경(파편화 완화)
      - FLAGS_use_stream_safe_cuda_allocator=true
      # - FLAGS_allocator_strategy=auto_growth
      # - FLAGS_use_cuda_managed_memory=false

      - DEBUG_DUMP_DIR=/app/temp_files/debug
      - DEBUG_DUMP_ENABLE=1

    # entrypoint: ["/bin/bash", "/app/patch-entrypoint.sh"]

    command: *celery_cmd

    restart: unless-stopped

    healthcheck:
      test: ["CMD", "celery", "-A", "worker.celery_worker.celery_app", "inspect", "ping"]

      interval: 60s
      timeout: 10s
      retries: 3

    # ───── Swarm 모드일 때만 적용 (stand-alone compose에선 무시)
    deploy:
      replicas: 1               # ← 원하는 워커 개수
      restart_policy:
        condition: any
      resources:
        limits:
          memory: 1.5G            # child 2 × LaMa 로딩 메모리 예상치
        reservations:
          devices:
            - capabilities: ["gpu"]   # Swarm용 GPU 예약