forked from ckh08045/AutoPercenty
188 lines
8.8 KiB
Python
188 lines
8.8 KiB
Python
from playwright.sync_api import sync_playwright
|
|
from bs4 import BeautifulSoup
|
|
import requests, json
|
|
|
|
def run(playwright, keyword):
|
|
browser = playwright.chromium.launch(headless=False) # 헤드리스 모드 비활성화
|
|
context = browser.new_context(
|
|
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36",
|
|
locale="en-US", # 언어 설정
|
|
timezone_id="America/New_York" # 시간대 설정
|
|
)
|
|
page = context.new_page()
|
|
|
|
# navigator.webdriver 비활성화
|
|
page.evaluate("navigator.webdriver = undefined")
|
|
|
|
print("브라우저 열기 및 사용자 에이전트 설정 완료")
|
|
|
|
page.goto(f"https://search.shopping.naver.com/search/all?query={keyword}&bt=-1&frm=NVSCPRO")
|
|
print("페이지 로딩 완료")
|
|
|
|
# 접근 시간 지연
|
|
page.wait_for_timeout(1000)
|
|
|
|
print("페이지 로딩 완료")
|
|
# 페이지의 HTML 가져오기
|
|
html = page.content()
|
|
print(html)
|
|
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
|
|
products = extract_product_info(soup)
|
|
|
|
for product in products:
|
|
print(product.text)
|
|
|
|
browser.close()
|
|
|
|
|
|
def extract_product_info(soup):
|
|
# 상품 정보를 담을 리스트 초기화
|
|
product_info_list = []
|
|
|
|
# 각 상품에 대한 정보 추출
|
|
products = soup.find_all('div', class_='basicList_info_area__17Xyo')
|
|
print(f"products : {products}")
|
|
for i, product in enumerate(products, 1):
|
|
# 상품명
|
|
title = product.find('a', class_='product_link__TrAac').get('title')
|
|
print(f"{i}번째 상품 title : {title}")
|
|
|
|
# 썸네일 URL
|
|
thumbnail_element = product.find('a', class_='thumbnail_thumb__Bxb6Z')
|
|
thumbnail_url = thumbnail_element.find('img').get('src') if thumbnail_element else None
|
|
print(f"{i}번째 상품 thumbnail_url : {thumbnail_url}")
|
|
|
|
# 가격과 배송비
|
|
price_area = product.find('div', class_='product_price_area__eTg7I')
|
|
price = price_area.find('span', class_='price_num__S2p_v').text.strip() if price_area else None
|
|
print(f"{i}번째 상품 price : {price}")
|
|
delivery_fee = price_area.find('span', class_='price_delivery__yw_We').text.strip() if price_area else None
|
|
print(f"{i}번째 상품 delivery_fee : {delivery_fee}")
|
|
|
|
# 카테고리
|
|
categories = [cat.text for cat in product.find_all('span', class_='product_category__l4FWz')]
|
|
print(f"{i}번째 상품 categories : {categories}")
|
|
|
|
# 리뷰, 구매건수, 등록일, 찜하기
|
|
etc_info = product.find('div', class_='product_etc_box__ElfVA')
|
|
review_count = etc_info.find('a', text='리뷰').find('em').text if etc_info and etc_info.find('a', text='리뷰') else '0'
|
|
print(f"{i}번째 상품 review_count : {review_count}")
|
|
purchase_count = etc_info.find('span', text='구매건수').find('em').text if etc_info and etc_info.find('span', text='구매건수') else '0'
|
|
print(f"{i}번째 상품 purchase_count : {purchase_count}")
|
|
registration_date = etc_info.find('span', text='등록일').text.replace('등록일 ', '') if etc_info and etc_info.find('span', text='등록일') else None
|
|
print(f"{i}번째 상품 registration_date : {registration_date}")
|
|
zzim_count = etc_info.find('a', class_='product_btn_zzim__MQ17u').find('em').text if etc_info and etc_info.find('a', class_='product_btn_zzim__MQ17u') else '0'
|
|
print(f"{i}번째 상품 zzim_count : {zzim_count}")
|
|
|
|
# 사전에 정보 저장
|
|
product_info = {
|
|
'title': title,
|
|
'thumbnail_url': thumbnail_url,
|
|
'price': price,
|
|
'delivery_fee': delivery_fee,
|
|
'categories': categories,
|
|
'review_count': review_count,
|
|
'purchase_count': purchase_count,
|
|
'registration_date': registration_date,
|
|
'zzim_count': zzim_count
|
|
}
|
|
|
|
product_info_list.append(product_info)
|
|
print(f"{i}번째 상품 정보 추가 완료")
|
|
|
|
return product_info_list
|
|
|
|
|
|
|
|
def ns(keyword):
|
|
# 네이버 쇼핑 URL 설정
|
|
urlBase = "https://search.shopping.naver.com/search/all?query="
|
|
# urlEnd = f"&cat_id={naver_code}&frm=NVSHATC&pagingIndex=1&pagingSize=40&&productSet=overseas&sort=rel×tamp=&viewType=list"
|
|
urlEnd = "&frm=NVSHATC&pagingIndex=1&pagingSize=40&&productSet=overseas&sort=rel×tamp=&viewType=list"
|
|
url = urlBase + keyword + urlEnd
|
|
# print(f"네이버 카테코드는 [{naver_code}] 입니다.")
|
|
print("네이버 카테코드는 [생략]] 입니다.")
|
|
print(f"대상키워드는 [{keyword}] 입니다.")
|
|
headers = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36",
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"DNT": "1", # Do Not Track 요청 헤더 (사용자의 추적을 거부)
|
|
"Connection": "keep-alive",
|
|
"Upgrade-Insecure-Requests": "1", # https로의 업그레이드를 요청
|
|
"Cache-Control": "max-age=0", # 캐시된 콘텐츠를 재사용하지 않도록 요청
|
|
}
|
|
|
|
# # 네이버 쇼핑에 접속하여 HTML 받아오기
|
|
# response = requests.get(url, headers=headers)
|
|
# soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
# 네이버 쇼핑에 접속하여 HTML 받아오기
|
|
try:
|
|
response = requests.get(url, headers=headers)
|
|
html = response.text
|
|
print(f"response : {response}")
|
|
# print(f"html : {html}")
|
|
response.raise_for_status() # 만약 요청이 실패하면 예외 발생
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
next_data_str = soup.find("script", {"id": "__NEXT_DATA__"}).string
|
|
next_data_json = json.loads(next_data_str)
|
|
products_list = next_data_json["props"]["pageProps"]["initialState"]["products"]["list"]
|
|
products_info = []
|
|
for product in products_list:
|
|
price = product.get("item", {}).get("price")
|
|
productTitle = product.get("item", {}).get("productTitle")
|
|
category1Name = product.get("item", {}).get("category1Name")
|
|
category2Name = product.get("item", {}).get("category2Name")
|
|
category3Name = product.get("item", {}).get("category3Name")
|
|
category4Name = product.get("item", {}).get("category4Name")
|
|
openDate = product.get("item", {}).get("openDate")
|
|
mallCount = product.get("item", {}).get("mallCount")
|
|
keepCnt = product.get("item", {}).get("keepCnt")
|
|
overseaTp = product.get("item", {}).get("overseaTp")
|
|
reviewCount = product.get("item", {}).get("reviewCount")
|
|
reviewCountSum = product.get("item", {}).get("reviewCountSum")
|
|
scoreInfo = product.get("item", {}).get("scoreInfo")
|
|
naverPayAdAccumulatedDisplayValue = product.get("item", {}).get("naverPayAdAccumulatedDisplayValue")
|
|
mobileLowPrice = product.get("item", {}).get("mobileLowPrice")
|
|
lowPrice = product.get("item", {}).get("lowPrice")
|
|
deliveryFeeContent = product.get("item", {}).get("deliveryFeeContent")
|
|
dlvryLowPrice = product.get("item", {}).get("dlvryLowPrice")
|
|
imageUrl = product.get("item", {}).get("imageUrl")
|
|
imgSz = product.get("item", {}).get("imgSz")
|
|
searchKeyword = product.get("item", {}).get("searchKeyword")
|
|
mallProductUrl = product.get("item", {}).get("mallProductUrl")
|
|
mallPcUrl = product.get("item", {}).get("mallPcUrl")
|
|
mallName = product.get("item", {}).get("mallName")
|
|
manuTag = product.get("item", {}).get("manuTag")
|
|
#mallInfoCache = product.get("item", {}).get("mallInfoCache")
|
|
purchaseCnt = product.get("item", {}).get("purchaseCnt")
|
|
rank = product.get("item", {}).get("rank")
|
|
|
|
# 상품 정보를 딕셔너리로 만들어 리스트에 추가
|
|
product_info = {
|
|
"productTitle": productTitle,
|
|
"price": price,
|
|
"imageUrl": imageUrl,
|
|
"rank": rank,
|
|
"purchase" : purchaseCnt,
|
|
"review" : reviewCountSum
|
|
}
|
|
products_info.append(product_info)
|
|
|
|
print(f"키워드 검색 결과 상품 [{keyword}]에 대한 [{len(products_info)}]개의 상품정보수집 완료")
|
|
|
|
print(f"products_info \n {products_info}")
|
|
except Exception as e:
|
|
print(f"Exception : {e}")
|
|
|
|
|
|
keywrod = "방폭등"
|
|
|
|
ns(keywrod)
|
|
|
|
# with sync_playwright() as playwright:
|
|
# run(playwright, keywrod) |