baidu_web/mainProcessor.py

import os, time
from xlsReader import ExcelReader
from databaseManager import DatabaseManager
from imageDownloader import ImageDownloader
from imgSearcher import BaiduImageSearcher
from resultDiag import ProductViewer
import pandas as pd
from datetime import datetime
from PySide6.QtCore import QObject, Signal
import xlwings as xw


class MainProcessor(QObject):
    update_progress_signal = Signal(int)  # 진행 상태 업데이트
    log_message_signal = Signal(str)     # 로그 메시지
    finished_signal = Signal()           # 작업 완료 시그널

    def __init__(self, base_path, excel_folder, db_path, img_folder, main_window, logger):
        super().__init__()  # QObject 초기화
        self.base_path = base_path
        self.logger = logger
        self.main_window = main_window
        self.excel_reader = ExcelReader(excel_folder, logger)
        self.db_manager = DatabaseManager(self.base_path, db_path, logger)
        self.resultViewer = ProductViewer(self.db_manager, logger)
        self.image_downloader = ImageDownloader(img_folder, logger)
        self.image_searcher = BaiduImageSearcher(self.base_path, sources=['淘宝', 'tmall', '1688'], image_downloader=self.image_downloader, db_manager=self.db_manager, logger=logger)
        self.logger.info("MainProcessor initialized.")

    def process_all_products(self):
        try:
            # Spinbox의 가격 필터링 기준값 가져오기
            price_filter = self.main_window.price_spinbox.value()
            price_filter = int(price_filter)
            self.logger.debug(f"price_filter : {price_filter}")

            # 모든 엑셀 파일을 읽어와 DB에 저장
            products = self.excel_reader.read_excel_files()
            product_ids = self.db_manager.insert_products(products, price_filter)

            # total_products = len(products)  # 전체 레코드 수
            total_products = self.db_manager.count_products()
            processed_count = 0

            self.logger.info("Starting product processing...")
            self.log_message_signal.emit(f"Starting product processing...")

            self.image_searcher.start_browser()

            # 각 상품에 대해 이미지 검색 수행
            for product, product_id in zip(products, product_ids):
                try:
                    if product_id is None:
                        self.logger.error("Failed to insert product into database.")
                        self.log_message_signal.emit(f"Failed to insert product into database.")

                        continue

                    # 이미지 다운로드 및 검색 실행
                    image_path = self.image_downloader.download_image(product['image_url'], product_id)
                    product['saved_img_path'] = image_path  # 이미지 경로 설정
                    # self.db_manager.insert_products([product])
                    self.db_manager.update_product_image_path(product_id, image_path)

                    # 재시도 횟수 설정
                    max_retries = 3
                    attempt = 0
                    is_success_upload_image = False
                    is_success_expand_results = False
                    search_results = None

                    while attempt < max_retries:
                        # upload_image 메서드 실행 및 성공 여부 확인
                        self.image_searcher.goto_initialPage()
                        self.logger.debug(f"검색페이지로 가기")

                        is_success_upload_image = self.image_searcher.upload_image(image_path)
                        if not is_success_upload_image:
                            attempt += 1
                            self.logger.warning(f"Upload image failed for Product ID [{product_id}]. Retry {attempt}/{max_retries}")
                            time.sleep(1)
                            continue  # 재시도 시 루프를 다시 시작

                        # expand_results 메서드 실행 및 성공 여부 확인
                        is_success_expand_results = self.image_searcher.expand_results()
                        if not is_success_expand_results:
                            attempt += 1
                            self.log_message_signal.emit(f"No results extracted for product ID [{product_id}]. Retry {attempt}/{max_retries}.")

                            # self.logger.warning(f"Expand results failed for Product ID [{product_id}]. Retry {attempt}/{max_retries}")
                            time.sleep(1)
                            continue  # 재시도 시 루프를 다시 시작

                        # 검색 결과 추출
                        search_results = self.image_searcher.extract_product_data(product_id)
                        print(f"search_results : {search_results}")

                        # 모든 작업이 성공하면 루프 종료
                        if is_success_upload_image and is_success_expand_results:
                            break
                    else:
                        # 재시도 횟수 초과 시 경고 로그 출력 및 다음 제품으로 이동
                        self.log_message_signal.emit(f"Failed to process product ID [{product_id}] after {max_retries} attempts. Skipping.")
                        # self.logger.error(f"Failed to process Product ID [{product_id}] after {max_retries} attempts.")
                        continue

                    # 성공 시 검색 결과를 DB에 저장
                    self.logger.debug(f"Insert DB: {product_id}")

                    self.db_manager.insert_search_results(product_id, search_results)
                    self.log_message_signal.emit(f"Search results for product ID [{product_id}] inserted into database.")

                    # 처리된 제품 수 증가 및 진행 상태 업데이트
                    processed_count += 1
                    progress = int((processed_count / total_products) * 100)
                    self.update_progress_signal.emit(progress)

                except Exception as e:
                    self.log_message_signal.emit(f"Error processing product ID [{product_id}]: {str(e)}")
                    self.logger.error(f"Error processing product ID [{product_id}]: {e}", exc_info=True)

            # 브라우저 닫기
            self.image_searcher.close_browser()
            self.log_message_signal.emit("Image searcher browser closed.")
            self.logger.info("Image searcher browser closed.")
        except Exception as e:
            self.log_message_signal.emit(f"Unexpected error: {str(e)}")
            self.logger.error(f"Unexpected error in process_all_products: {e}", exc_info=True)

        finally:
            self.finished_signal.emit()

    # def show_results(self):
    #     # 검색 결과 출력
    #     try:
    #         print("show_results")
    #         self.resultViewer = ProductViewer(self.db_manager.conn)
    #         self.resultViewer.show()
    #         self.logger.debug(f"show_results Completed")
    #     except Exception as e:
    #         self.logger.warning(f"Failed to show_results - {e}", exc_info=True)

    def show_results(self):
        """검색 결과 출력"""
        try:
            print("show_results")
            # ProductViewer 초기화 또는 갱신
            if not hasattr(self, 'resultViewer') or self.resultViewer is None:
                print("ProductViewer 객체 생성")
                self.resultViewer = ProductViewer(self.db_manager.conn)

            else:
                print("ProductViewer refresh_data")
                self.resultViewer.refresh_data(self.db_manager.conn)

            self.resultViewer.show()
            self.logger.debug(f"show_results Completed")
        except Exception as e:
            self.logger.warning(f"Failed to show_results - {e}", exc_info=True)

    def export_to_xls(self):
        """
        기존 BaseXLS.xlsx 파일을 복사하여 데이터를 50개씩 나누어 별도의 파일로 저장합니다.
        """
        try:
            self.log_message_signal.emit("Start Export Excel Job")

            # 날짜와 파일명 설정
            date_str = datetime.now().strftime('%Y%m%d')
            base_filename = os.path.join(self.base_path, "src", "BaseXLS.xlsx")

            if not os.path.exists(base_filename):
                self.logger.error(f"BaseXLS.xlsx 파일이 존재하지 않습니다: {base_filename}")
                return

            # DB 데이터 가져오기
            with self.db_manager.conn as conn:
                cursor = conn.execute('''
                    SELECT
                        sr.original_url,
                        p.name,
                        p.price,
                        p.tag,
                        p.percenty_category
                    FROM products p
                    JOIN search_results sr ON p.id = sr.product_id
                    WHERE sr.is_selected = 1
                ''')
                data = cursor.fetchall()

            # 데이터 정리 (None 값 처리 및 tag의 빈칸을 쉼표로 변환)
            cleaned_data = [
                [
                    row[0] if row[0] is not None else "",
                    row[1] if row[1] is not None else "",
                    row[2] if row[2] is not None else "",
                    row[3].replace(" ", ",") if row[3] is not None else "",  # tag 빈칸을 쉼표로 변환
                    row[4] if row[4] is not None else "",
                ]
                for row in data
            ]

            # 데이터를 50개씩 나누기
            chunks = [cleaned_data[i:i + 50] for i in range(0, len(cleaned_data), 50)]

            # 50개씩 별도의 파일로 저장
            for idx, chunk in enumerate(chunks, start=1):
                # 출력 파일명 설정
                output_filename = f"출력데이터_{date_str}_{idx}.xlsx"

                # xlwings로 템플릿 파일 열기
                app = xw.App(visible=False)
                workbook = xw.Book(base_filename)
                sheet = workbook.sheets[0]  # 첫 번째 시트 사용

                # 데이터 입력 (B4부터 시작)
                start_row = 4
                for i, row_data in enumerate(chunk, start=start_row):
                    sheet.range(f"B{i}").value = row_data[0]  # original_url
                    sheet.range(f"C{i}").value = row_data[1]  # name
                    sheet.range(f"D{i}").value = row_data[2]  # price
                    sheet.range(f"F{i}").value = row_data[3]  # tag
                    sheet.range(f"G{i}").value = row_data[4]  # percenty_category

                # 저장 및 닫기
                workbook.save(output_filename)
                workbook.close()
                app.quit()

                self.logger.info(f"{output_filename} 파일에 데이터 50개 저장 완료.")
                self.log_message_signal.emit(f"{output_filename} 파일에 데이터 50개 저장 완료.")

        except Exception as e:
            self.logger.error(f"Error exporting to Excel with template: {e}", exc_info=True)