baidu_web/mainProcessor.py

import os, time
from xlsReader import ExcelReader
from databaseManager import DatabaseManager
from imageDownloader import ImageDownloader
from imgSearcher import BaiduImageSearcher
from resultDiag import ProductViewer
import pandas as pd
from datetime import datetime

class MainProcessor:
    def __init__(self, excel_folder, db_path, img_folder, logger):
        self.logger = logger
        self.excel_reader = ExcelReader(excel_folder, logger)
        self.db_manager = DatabaseManager(db_path, logger)
        self.logger.debug("1")
        self.resultViewer = ProductViewer(db_path)
        self.logger.debug("2")
        self.image_downloader = ImageDownloader(img_folder, logger)
        self.image_searcher = BaiduImageSearcher(sources=['淘宝', 'tmall', '1688'], image_downloader=self.image_downloader, logger=logger)
        self.logger.info("MainProcessor initialized.")

    def clean_up_files(self):
        img_folder = os.path.join(os.getcwd(), 'img')
        xls_folder = os.path.join(os.getcwd(), 'xls')

        for folder in [img_folder, xls_folder]:
            for filename in os.listdir(folder):
                file_path = os.path.join(folder, filename)
                try:
                    if os.path.isfile(file_path):
                        os.remove(file_path)
                        self.logger.info(f"Deleted file: {file_path}")
                except Exception as e:
                    self.logger.error(f"Error deleting file {file_path}: {e}")

    def process_all_products(self):
        # 모든 엑셀 파일을 읽어와 DB에 저장
        products = self.excel_reader.read_excel_files()
        product_ids = self.db_manager.insert_products(products)

        self.image_searcher.start_browser()

        # 각 상품에 대해 이미지 검색 수행
        for product, product_id in zip(products, product_ids):
            try:
                if product_id is None:
                    self.logger.error("Failed to insert product into database.")
                    continue

                # 이미지 다운로드 및 검색 실행
                image_path = self.image_downloader.download_image(product['image_url'], product_id)
                product['saved_img_path'] = image_path  # 이미지 경로 설정
                # self.db_manager.insert_products([product])
                self.db_manager.update_product_image_path(product_id, image_path)

                # 재시도 횟수 설정
                max_retries = 3
                attempt = 0
                is_success_upload_image = False
                is_success_expand_results = False
                search_results = None

                while attempt < max_retries:
                    # upload_image 메서드 실행 및 성공 여부 확인
                    self.image_searcher.goto_initialPage()
                    self.logger.debug(f"검색페이지로 가기")

                    is_success_upload_image = self.image_searcher.upload_image(image_path)
                    if not is_success_upload_image:
                        attempt += 1
                        self.logger.warning(f"Upload image failed for Product ID [{product_id}]. Retry {attempt}/{max_retries}")
                        time.sleep(1)
                        continue  # 재시도 시 루프를 다시 시작

                    # expand_results 메서드 실행 및 성공 여부 확인
                    is_success_expand_results = self.image_searcher.expand_results()
                    if not is_success_expand_results:
                        attempt += 1
                        self.logger.warning(f"Expand results failed for Product ID [{product_id}]. Retry {attempt}/{max_retries}")
                        time.sleep(1)
                        continue  # 재시도 시 루프를 다시 시작

                    # 검색 결과 추출
                    search_results = self.image_searcher.extract_product_data(product_id)
                    # if search_results == []:  # 빈 리스트일 경우만 실패로 간주
                    #     attempt += 1
                    #     self.logger.warning(f"Extract product data failed for Product ID [{product_id}]. Retry {attempt}/{max_retries}")
                    #     time.sleep(1)
                    #     continue  # 재시도 시 루프를 다시 시작

                    # 모든 작업이 성공하면 루프 종료
                    if is_success_upload_image and is_success_expand_results:
                        break
                else:
                    # 재시도 횟수 초과 시 경고 로그 출력 및 다음 제품으로 이동
                    self.logger.error(f"Failed to process Product ID [{product_id}] after {max_retries} attempts.")
                    continue

                # 성공 시 검색 결과를 DB에 저장
                self.logger.debug(f"Insert DB: {product_id}")

                self.db_manager.insert_search_results(product_id, search_results)
                # input("로그를 확인한 후 아무 키나 눌러서 계속하세요...")

                # os.remove(image_path)
                self.logger.debug(f"Processed product ID: {product_id}")
                time.sleep(1)

            except Exception as e:
                self.logger.warning(f"Failed to process product ID: {product_id} - {e}", exc_info=True)

    def show_results(self):
        # 검색 결과 출력
        try:
            self.resultViewer.show()
            self.logger.debug(f"show_results Completed")
        except Exception as e:
            self.logger.warning(f"Failed to show_results - {e}", exc_info=True)


    def export_to_xls(self):
        # 현재 날짜와 시간을 사용하여 파일 이름 생성
        date_str = datetime.now().strftime('%Y%m%d')
        file_index = 1

        try:
            with self.db_manager.conn as conn:
                cursor = conn.execute('''
                    SELECT s.original_url, p.name, p.tag, p.percenty_category
                    FROM products p
                    JOIN search_results s ON p.id = s.product_id
                ''')

                data = cursor.fetchall()

                # 50개씩 데이터를 나누어 출력
                for i in range(0, len(data), 50):
                    # 50개 데이터를 추출하여 DataFrame으로 변환
                    chunk = data[i:i + 50]
                    df = pd.DataFrame(chunk, columns=["original_url", "name", "tag", "percenty_category"])

                    # 엑셀 파일 생성
                    excel_filename = f"출력데이터_{date_str}_{file_index}.xlsx"
                    with pd.ExcelWriter(excel_filename) as writer:
                        df.to_excel(writer, index=False, startrow=3, startcol=1, sheet_name="multi_ss")

                        # 셀 배치
                        worksheet = writer.sheets['multi_ss']
                        for row_idx, row_data in enumerate(chunk, start=4):
                            worksheet[f"B{row_idx}"] = row_data[0]  # original_url
                            worksheet[f"C{row_idx}"] = row_data[1]  # name
                            worksheet[f"F{row_idx}"] = row_data[2]  # tag
                            worksheet[f"G{row_idx}"] = row_data[3]  # percent_category

                    self.logger.info(f"{excel_filename} 파일에 데이터 50개 저장 완료.")
                    file_index += 1
        except Exception as e:
            self.logger.error(f"Error exporting to Excel: {e}", exc_info=True)