from playwright.sync_api import sync_playwright from bs4 import BeautifulSoup import requests, json def run(playwright, keyword): browser = playwright.chromium.launch(headless=False) # 헤드리스 모드 비활성화 context = browser.new_context( user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36", locale="en-US", # 언어 설정 timezone_id="America/New_York" # 시간대 설정 ) page = context.new_page() # navigator.webdriver 비활성화 page.evaluate("navigator.webdriver = undefined") print("브라우저 열기 및 사용자 에이전트 설정 완료") page.goto(f"https://search.shopping.naver.com/search/all?query={keyword}&bt=-1&frm=NVSCPRO") print("페이지 로딩 완료") # 접근 시간 지연 page.wait_for_timeout(1000) print("페이지 로딩 완료") # 페이지의 HTML 가져오기 html = page.content() print(html) soup = BeautifulSoup(html, 'html.parser') products = extract_product_info(soup) for product in products: print(product.text) browser.close() def extract_product_info(soup): # 상품 정보를 담을 리스트 초기화 product_info_list = [] # 각 상품에 대한 정보 추출 products = soup.find_all('div', class_='basicList_info_area__17Xyo') print(f"products : {products}") for i, product in enumerate(products, 1): # 상품명 title = product.find('a', class_='product_link__TrAac').get('title') print(f"{i}번째 상품 title : {title}") # 썸네일 URL thumbnail_element = product.find('a', class_='thumbnail_thumb__Bxb6Z') thumbnail_url = thumbnail_element.find('img').get('src') if thumbnail_element else None print(f"{i}번째 상품 thumbnail_url : {thumbnail_url}") # 가격과 배송비 price_area = product.find('div', class_='product_price_area__eTg7I') price = price_area.find('span', class_='price_num__S2p_v').text.strip() if price_area else None print(f"{i}번째 상품 price : {price}") delivery_fee = price_area.find('span', class_='price_delivery__yw_We').text.strip() if price_area else None print(f"{i}번째 상품 delivery_fee : {delivery_fee}") # 카테고리 categories = [cat.text for cat in product.find_all('span', class_='product_category__l4FWz')] print(f"{i}번째 상품 categories : {categories}") # 리뷰, 구매건수, 등록일, 찜하기 etc_info = product.find('div', class_='product_etc_box__ElfVA') review_count = etc_info.find('a', text='리뷰').find('em').text if etc_info and etc_info.find('a', text='리뷰') else '0' print(f"{i}번째 상품 review_count : {review_count}") purchase_count = etc_info.find('span', text='구매건수').find('em').text if etc_info and etc_info.find('span', text='구매건수') else '0' print(f"{i}번째 상품 purchase_count : {purchase_count}") registration_date = etc_info.find('span', text='등록일').text.replace('등록일 ', '') if etc_info and etc_info.find('span', text='등록일') else None print(f"{i}번째 상품 registration_date : {registration_date}") zzim_count = etc_info.find('a', class_='product_btn_zzim__MQ17u').find('em').text if etc_info and etc_info.find('a', class_='product_btn_zzim__MQ17u') else '0' print(f"{i}번째 상품 zzim_count : {zzim_count}") # 사전에 정보 저장 product_info = { 'title': title, 'thumbnail_url': thumbnail_url, 'price': price, 'delivery_fee': delivery_fee, 'categories': categories, 'review_count': review_count, 'purchase_count': purchase_count, 'registration_date': registration_date, 'zzim_count': zzim_count } product_info_list.append(product_info) print(f"{i}번째 상품 정보 추가 완료") return product_info_list def ns(keyword): # 네이버 쇼핑 URL 설정 urlBase = "https://search.shopping.naver.com/search/all?query=" # urlEnd = f"&cat_id={naver_code}&frm=NVSHATC&pagingIndex=1&pagingSize=40&&productSet=overseas&sort=rel×tamp=&viewType=list" urlEnd = "&frm=NVSHATC&pagingIndex=1&pagingSize=40&&productSet=overseas&sort=rel×tamp=&viewType=list" url = urlBase + keyword + urlEnd # print(f"네이버 카테코드는 [{naver_code}] 입니다.") print("네이버 카테코드는 [생략]] 입니다.") print(f"대상키워드는 [{keyword}] 입니다.") headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Accept-Language": "en-US,en;q=0.9", "Accept-Encoding": "gzip, deflate, br", "DNT": "1", # Do Not Track 요청 헤더 (사용자의 추적을 거부) "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", # https로의 업그레이드를 요청 "Cache-Control": "max-age=0", # 캐시된 콘텐츠를 재사용하지 않도록 요청 } # # 네이버 쇼핑에 접속하여 HTML 받아오기 # response = requests.get(url, headers=headers) # soup = BeautifulSoup(response.text, 'html.parser') # 네이버 쇼핑에 접속하여 HTML 받아오기 try: response = requests.get(url, headers=headers) html = response.text print(f"response : {response}") # print(f"html : {html}") response.raise_for_status() # 만약 요청이 실패하면 예외 발생 soup = BeautifulSoup(response.text, 'html.parser') next_data_str = soup.find("script", {"id": "__NEXT_DATA__"}).string next_data_json = json.loads(next_data_str) products_list = next_data_json["props"]["pageProps"]["initialState"]["products"]["list"] products_info = [] for product in products_list: price = product.get("item", {}).get("price") productTitle = product.get("item", {}).get("productTitle") category1Name = product.get("item", {}).get("category1Name") category2Name = product.get("item", {}).get("category2Name") category3Name = product.get("item", {}).get("category3Name") category4Name = product.get("item", {}).get("category4Name") openDate = product.get("item", {}).get("openDate") mallCount = product.get("item", {}).get("mallCount") keepCnt = product.get("item", {}).get("keepCnt") overseaTp = product.get("item", {}).get("overseaTp") reviewCount = product.get("item", {}).get("reviewCount") reviewCountSum = product.get("item", {}).get("reviewCountSum") scoreInfo = product.get("item", {}).get("scoreInfo") naverPayAdAccumulatedDisplayValue = product.get("item", {}).get("naverPayAdAccumulatedDisplayValue") mobileLowPrice = product.get("item", {}).get("mobileLowPrice") lowPrice = product.get("item", {}).get("lowPrice") deliveryFeeContent = product.get("item", {}).get("deliveryFeeContent") dlvryLowPrice = product.get("item", {}).get("dlvryLowPrice") imageUrl = product.get("item", {}).get("imageUrl") imgSz = product.get("item", {}).get("imgSz") searchKeyword = product.get("item", {}).get("searchKeyword") mallProductUrl = product.get("item", {}).get("mallProductUrl") mallPcUrl = product.get("item", {}).get("mallPcUrl") mallName = product.get("item", {}).get("mallName") manuTag = product.get("item", {}).get("manuTag") #mallInfoCache = product.get("item", {}).get("mallInfoCache") purchaseCnt = product.get("item", {}).get("purchaseCnt") rank = product.get("item", {}).get("rank") # 상품 정보를 딕셔너리로 만들어 리스트에 추가 product_info = { "productTitle": productTitle, "price": price, "imageUrl": imageUrl, "rank": rank, "purchase" : purchaseCnt, "review" : reviewCountSum } products_info.append(product_info) print(f"키워드 검색 결과 상품 [{keyword}]에 대한 [{len(products_info)}]개의 상품정보수집 완료") print(f"products_info \n {products_info}") except Exception as e: print(f"Exception : {e}") keywrod = "방폭등" ns(keywrod) # with sync_playwright() as playwright: # run(playwright, keywrod)