tao2/modules/tao2.py

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

import time
import os
from bs4 import BeautifulSoup
import re
from urllib.request import urlretrieve
from time import sleep
import requests
from cookie_manager import load_cookies, save_cookies, check_login_status
import logging

# 로거 인스턴스 가져오기
logger = logging.getLogger('default_logger')

# 드라이버 초기화
driver = webdriver.Chrome()
driver.get("https://world.taobao.com/wow/tmg-fc/tmw/search_image?")
logger.debug("셀레니움 시작")
# 사용자 에이전트 변경
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}

# 요청 간 지연 시간 추가
sleep(1)  # 1초 대기

# 쿠키 로드 및 적용
load_cookies(driver)
logger.debug("지난 쿠키 로드 완료")

#sleep(1)  # 1초 대기
driver.refresh()  # 쿠키 로드 후 페이지 새로고침
logger.debug("페이지 리로드")

# 로그인 상태 확인
if not check_login_status(driver):
    # 로그인되지 않았을 경우, 사용자가 로그인할 때까지 기다리거나 로그인 과정 수행
    logger.debug("재로그인 하세요!")
    # 로그인할 시간을 주기 위해 명시적으로 기다리는 대신 사용자에게 알림
    WebDriverWait(driver, 600).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".rax-view-v2:nth-child(1) > .rax-view-v2 > .mobile--class-1--2Vz4bM4")))


    # 로그인 과정 구현 필요

# 상품 검색 결과 페이지 접속 및 처리 로직
# 여기에 상품 검색 결과를 파싱하는 로직 구현

# 세션 종료 전 쿠키 저장
save_cookies(driver)
logger.debug("쿠키저장완료")

# 이미지 URL
test_image_url = "https://file.percenty.co.kr/public/652bed8e865b1f32ea62bf1f/products/65bb7381a2cce53ffb10e666/fd739720-3aeb-41ca-974b-c933b93f9127.jpg"

# 이미지를 로컬에 저장
local_image_path = "./img/test_image.jpg"  # 저장할 로컬 경로
if not os.path.exists("./img"):
    os.makedirs("./img")
urlretrieve(test_image_url, local_image_path)
logger.debug("이미지 저장 완료")

# JavaScript를 사용하여 이미지 검색 버튼 클릭
search_button_selector = ".component-search-icon-active"
driver.execute_script(f"document.querySelector('{search_button_selector}').click();")
logger.debug("이미지검색버튼 클릭")

# 파일 업로드 처리
file_input = WebDriverWait(driver, 60).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, "input[type='file']"))
)
file_input.send_keys(os.path.abspath(local_image_path))
logger.debug("검색결과가 로그인이 필요한지에 대한 판단 중....")
# 사용자에게 로그인하라는 메세지 표시
logger.debug("로그인이 필요한 경우에 대한 로그인 처리 코드 작성필요")
# 로그인할 시간을 주기 위해 명시적으로 기다리는 대신 사용자에게 알림

# WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".rax-view-v2:nth-child(1) > .rax-view-v2 > .mobile--class-1--2Vz4bM4")))

max_refresh_attempts = 5
attempts = 0
while attempts < max_refresh_attempts:
    try:
        # "Sorry" 메시지 확인
        sorry_message_xpath = "//span[contains(.,'Sorry，没有找到相关的宝贝！！')]"
        WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.XPATH, sorry_message_xpath)))
        logger.debug("Sorry 메시지가 감지되었습니다. 페이지를 새로고침합니다.")
        attempts += 1
        driver.refresh()
        time.sleep(3)  # 페이지 새로고침 후 잠시 대기
    except TimeoutException as e:
        # "Sorry" 메시지가 없는 경우, 첫 번째 상품이 로드될 때까지 기다림
        try:
            first_product_CSS = ".rax-view-v2:nth-child(1) > .rax-view-v2 > .mobile--class-1--2Vz4bM4"
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, first_product_CSS)))
            logger.debug("첫 번째 상품이 로드되었습니다.")
            break  # 첫 번째 상품이 로드되면 반복 종료
        except TimeoutException:
            # 첫 번째 상품도 로드되지 않는 경우
            logger.debug("첫 번째 상품이 로드되지 않았습니다. 다시 시도합니다.")
            attempts += 1
            driver.refresh()

if attempts == max_refresh_attempts:
    raise Exception("최대 시도 횟수를 초과했습니다. 첫 번째 상품을 찾을 수 없습니다.")


# 첫 번째 상품의 URL이 나타날 때까지 기다림
first_product_url_xpath = "/html/body/div[12]/div/div[2]/div[2]/div[1]/div/a"
driver.implicitly_wait(10) # 최대 10초간 대기
logger.debug("첫번째 상품이 나올때 까지 10초간 대기")

# 상품이 모두 로드될 때까지 추가로 기다림
time.sleep(2) # 2~3초 정도 추가 대기

# 페이지의 HTML을 가져옴
page_source = driver.page_source
logger.debug("html파싱")

# BeautifulSoup으로 HTML 파싱
soup = BeautifulSoup(page_source, 'html.parser')

# 페이지 소스를 가져와 파일에 저장
with open("page_source.html", "w", encoding="utf-8") as file:
    file.write(page_source)

# 아이템 갯수 설정
item_count = 10

# 상품 정보를 저장할 리스트
products = []
logger.debug("상품정보리스트 생성")

# 모든 상품 정보를 포함하는 상위 요소를 찾음
product_list = soup.select('a.mobile--class-1--2Vz4bM4')

# 상품 정보 추출
for i, product in enumerate(soup.select('a.mobile--class-1--2Vz4bM4'), start=1):
    try:
        product_url = 'https:' + product['href']
        image_url = 'https:' + product.select_one("img")['src']
        product_name = product.select_one("span.mobile--summary--2mK9e7G").text
        price = product.select_one("span.mobile--price--3eMQ3ec").text
        sales_volume = product.select_one("span.mobile--buy--2I4hwR4").text

        # 상품 정보 사전에 저장
        product_info = {
            "Product Name": product_name,
            "Image URL": image_url,
            "Price": price,
            "Sales Volume": sales_volume,
            "Product URL": product_url,
        }

        # 리스트에 상품 정보 추가
        products.append(product_info)
        logger.debug(f"{i}번째 상품 정보: {product_info}")

    except Exception as e:
        logger.debug(f"Error extracting product {i}: {e}")

# 추출된 상품 정보 출력
for product in products:
    logger.debug(product)

driver.quit()
logger.debug("작업 종료")