AutoPercenty/edit/ns.py

188 lines
8.8 KiB
Python

from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
import requests, json
def run(playwright, keyword):
browser = playwright.chromium.launch(headless=False) # 헤드리스 모드 비활성화
context = browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36",
locale="en-US", # 언어 설정
timezone_id="America/New_York" # 시간대 설정
)
page = context.new_page()
# navigator.webdriver 비활성화
page.evaluate("navigator.webdriver = undefined")
print("브라우저 열기 및 사용자 에이전트 설정 완료")
page.goto(f"https://search.shopping.naver.com/search/all?query={keyword}&bt=-1&frm=NVSCPRO")
print("페이지 로딩 완료")
# 접근 시간 지연
page.wait_for_timeout(1000)
print("페이지 로딩 완료")
# 페이지의 HTML 가져오기
html = page.content()
print(html)
soup = BeautifulSoup(html, 'html.parser')
products = extract_product_info(soup)
for product in products:
print(product.text)
browser.close()
def extract_product_info(soup):
# 상품 정보를 담을 리스트 초기화
product_info_list = []
# 각 상품에 대한 정보 추출
products = soup.find_all('div', class_='basicList_info_area__17Xyo')
print(f"products : {products}")
for i, product in enumerate(products, 1):
# 상품명
title = product.find('a', class_='product_link__TrAac').get('title')
print(f"{i}번째 상품 title : {title}")
# 썸네일 URL
thumbnail_element = product.find('a', class_='thumbnail_thumb__Bxb6Z')
thumbnail_url = thumbnail_element.find('img').get('src') if thumbnail_element else None
print(f"{i}번째 상품 thumbnail_url : {thumbnail_url}")
# 가격과 배송비
price_area = product.find('div', class_='product_price_area__eTg7I')
price = price_area.find('span', class_='price_num__S2p_v').text.strip() if price_area else None
print(f"{i}번째 상품 price : {price}")
delivery_fee = price_area.find('span', class_='price_delivery__yw_We').text.strip() if price_area else None
print(f"{i}번째 상품 delivery_fee : {delivery_fee}")
# 카테고리
categories = [cat.text for cat in product.find_all('span', class_='product_category__l4FWz')]
print(f"{i}번째 상품 categories : {categories}")
# 리뷰, 구매건수, 등록일, 찜하기
etc_info = product.find('div', class_='product_etc_box__ElfVA')
review_count = etc_info.find('a', text='리뷰').find('em').text if etc_info and etc_info.find('a', text='리뷰') else '0'
print(f"{i}번째 상품 review_count : {review_count}")
purchase_count = etc_info.find('span', text='구매건수').find('em').text if etc_info and etc_info.find('span', text='구매건수') else '0'
print(f"{i}번째 상품 purchase_count : {purchase_count}")
registration_date = etc_info.find('span', text='등록일').text.replace('등록일 ', '') if etc_info and etc_info.find('span', text='등록일') else None
print(f"{i}번째 상품 registration_date : {registration_date}")
zzim_count = etc_info.find('a', class_='product_btn_zzim__MQ17u').find('em').text if etc_info and etc_info.find('a', class_='product_btn_zzim__MQ17u') else '0'
print(f"{i}번째 상품 zzim_count : {zzim_count}")
# 사전에 정보 저장
product_info = {
'title': title,
'thumbnail_url': thumbnail_url,
'price': price,
'delivery_fee': delivery_fee,
'categories': categories,
'review_count': review_count,
'purchase_count': purchase_count,
'registration_date': registration_date,
'zzim_count': zzim_count
}
product_info_list.append(product_info)
print(f"{i}번째 상품 정보 추가 완료")
return product_info_list
def ns(keyword):
# 네이버 쇼핑 URL 설정
urlBase = "https://search.shopping.naver.com/search/all?query="
# urlEnd = f"&cat_id={naver_code}&frm=NVSHATC&pagingIndex=1&pagingSize=40&&productSet=overseas&sort=rel&timestamp=&viewType=list"
urlEnd = "&frm=NVSHATC&pagingIndex=1&pagingSize=40&&productSet=overseas&sort=rel&timestamp=&viewType=list"
url = urlBase + keyword + urlEnd
# print(f"네이버 카테코드는 [{naver_code}] 입니다.")
print("네이버 카테코드는 [생략]] 입니다.")
print(f"대상키워드는 [{keyword}] 입니다.")
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"DNT": "1", # Do Not Track 요청 헤더 (사용자의 추적을 거부)
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1", # https로의 업그레이드를 요청
"Cache-Control": "max-age=0", # 캐시된 콘텐츠를 재사용하지 않도록 요청
}
# # 네이버 쇼핑에 접속하여 HTML 받아오기
# response = requests.get(url, headers=headers)
# soup = BeautifulSoup(response.text, 'html.parser')
# 네이버 쇼핑에 접속하여 HTML 받아오기
try:
response = requests.get(url, headers=headers)
html = response.text
print(f"response : {response}")
# print(f"html : {html}")
response.raise_for_status() # 만약 요청이 실패하면 예외 발생
soup = BeautifulSoup(response.text, 'html.parser')
next_data_str = soup.find("script", {"id": "__NEXT_DATA__"}).string
next_data_json = json.loads(next_data_str)
products_list = next_data_json["props"]["pageProps"]["initialState"]["products"]["list"]
products_info = []
for product in products_list:
price = product.get("item", {}).get("price")
productTitle = product.get("item", {}).get("productTitle")
category1Name = product.get("item", {}).get("category1Name")
category2Name = product.get("item", {}).get("category2Name")
category3Name = product.get("item", {}).get("category3Name")
category4Name = product.get("item", {}).get("category4Name")
openDate = product.get("item", {}).get("openDate")
mallCount = product.get("item", {}).get("mallCount")
keepCnt = product.get("item", {}).get("keepCnt")
overseaTp = product.get("item", {}).get("overseaTp")
reviewCount = product.get("item", {}).get("reviewCount")
reviewCountSum = product.get("item", {}).get("reviewCountSum")
scoreInfo = product.get("item", {}).get("scoreInfo")
naverPayAdAccumulatedDisplayValue = product.get("item", {}).get("naverPayAdAccumulatedDisplayValue")
mobileLowPrice = product.get("item", {}).get("mobileLowPrice")
lowPrice = product.get("item", {}).get("lowPrice")
deliveryFeeContent = product.get("item", {}).get("deliveryFeeContent")
dlvryLowPrice = product.get("item", {}).get("dlvryLowPrice")
imageUrl = product.get("item", {}).get("imageUrl")
imgSz = product.get("item", {}).get("imgSz")
searchKeyword = product.get("item", {}).get("searchKeyword")
mallProductUrl = product.get("item", {}).get("mallProductUrl")
mallPcUrl = product.get("item", {}).get("mallPcUrl")
mallName = product.get("item", {}).get("mallName")
manuTag = product.get("item", {}).get("manuTag")
#mallInfoCache = product.get("item", {}).get("mallInfoCache")
purchaseCnt = product.get("item", {}).get("purchaseCnt")
rank = product.get("item", {}).get("rank")
# 상품 정보를 딕셔너리로 만들어 리스트에 추가
product_info = {
"productTitle": productTitle,
"price": price,
"imageUrl": imageUrl,
"rank": rank,
"purchase" : purchaseCnt,
"review" : reviewCountSum
}
products_info.append(product_info)
print(f"키워드 검색 결과 상품 [{keyword}]에 대한 [{len(products_info)}]개의 상품정보수집 완료")
print(f"products_info \n {products_info}")
except Exception as e:
print(f"Exception : {e}")
keywrod = "방폭등"
ns(keywrod)
# with sync_playwright() as playwright:
# run(playwright, keywrod)