98 lines
3.4 KiB
Python
98 lines
3.4 KiB
Python
import undetected_chromedriver as uc
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium_stealth import stealth
|
|
import re
|
|
import json
|
|
import time
|
|
import logging
|
|
|
|
def fetch_preloaded_state(market_url: str, logger=None) -> dict:
|
|
best_url = market_url.rstrip('/') + "/best?cp=1"
|
|
if logger:
|
|
logger.debug(f"Fetching best URL: {best_url}")
|
|
|
|
options = uc.ChromeOptions()
|
|
options.headless = True
|
|
options.add_argument("--headless=new")
|
|
options.add_argument("--disable-blink-features=AutomationControlled")
|
|
options.add_argument('--disable-popup-blocking')
|
|
options.add_argument('--remote-debugging-port=9222')
|
|
|
|
try:
|
|
# enable_cdp_events와 incognito 모드를 활성화합니다.
|
|
driver = uc.Chrome(options=options, enable_cdp_events=True, incognito=True)
|
|
|
|
# selenium_stealth 설정
|
|
stealth(driver,
|
|
vendor="Google Inc. ",
|
|
platform="Win32",
|
|
webgl_vendor="intel Inc. ",
|
|
renderer="Intel Iris OpenGL Engine",
|
|
fix_hairline=True,
|
|
)
|
|
|
|
# 암묵적 대기 설정: 2초
|
|
driver.implicitly_wait(2)
|
|
|
|
# 추가적인 JavaScript 오버라이드 적용
|
|
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
|
|
"source": """
|
|
Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
|
|
window.navigator.chrome = { runtime: {} };
|
|
Object.defineProperty(navigator, 'languages', {get: () => ['ko-KR', 'en-US', 'en']});
|
|
Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
|
|
"""
|
|
})
|
|
|
|
|
|
driver.get(best_url)
|
|
|
|
# __PRELOADED_STATE__가 로드될 때까지 최대 30초 대기
|
|
# wait = WebDriverWait(driver, 30)
|
|
# wait.until(lambda d: d.execute_script("return typeof window.__PRELOADED_STATE__ !== 'undefined';"))
|
|
|
|
time.sleep(3)
|
|
|
|
# 디버깅: 현재 URL과 HTML의 일부 출력
|
|
current_url = driver.current_url
|
|
html = driver.page_source
|
|
if logger:
|
|
logger.debug(f"Current URL: {current_url}")
|
|
logger.debug("HTML snippet:")
|
|
# logger.debug(html[:500]) # 처음 500자만 출력
|
|
logger.debug(html)
|
|
|
|
# __PRELOADED_STATE__ 추출
|
|
pattern = r'window\.__PRELOADED_STATE__=({.*?});'
|
|
match = re.search(pattern, html, re.DOTALL)
|
|
if not match:
|
|
if logger:
|
|
logger.error("Could not find __PRELOADED_STATE__ in the page")
|
|
driver.quit()
|
|
return {}
|
|
state_json_str = match.group(1)
|
|
state = json.loads(state_json_str)
|
|
if logger:
|
|
logger.debug("Fetched __PRELOADED_STATE__ successfully")
|
|
driver.quit()
|
|
return state
|
|
except Exception as e:
|
|
if logger:
|
|
logger.error(f"Error fetching sold products: {e}", exc_info=True)
|
|
try:
|
|
driver.quit()
|
|
except Exception:
|
|
pass
|
|
return {}
|
|
|
|
# 테스트 실행 예제
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
test_logger = logging.getLogger("TestLogger")
|
|
|
|
sample_market_url = "https://smartstore.naver.com/modeuda"
|
|
state = fetch_preloaded_state(sample_market_url, test_logger)
|
|
|
|
import pprint
|
|
pprint.pprint(state)
|