AutoPercenty3/test/naver_parser.py

132 lines
6.1 KiB
Python

import requests
from bs4 import BeautifulSoup
import json
import logging
from typing import List, Dict, Optional
# 로거 인스턴스 가져오기
logger = logging.getLogger('default_logger')
class NaverParser:
def __init__(self):
self.base_url = "https://search.shopping.naver.com/search/all?query="
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"DNT": "1",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Cache-Control": "max-age=0"
}
def fetch_search_results(self, keyword: str, product_set: str = "overseas") -> Optional[Dict]:
"""네이버 쇼핑에서 키워드 검색 결과를 가져옴. product_set 인자로 제품 유형을 선택 가능."""
url = f"{self.base_url}{keyword}&frm=NVSHATC&pagingIndex=1&pagingSize=40&productSet={product_set}&sort=rel&timestamp=&viewType=list"
logger.debug(f"검색 URL: {url}")
try:
response = requests.get(url, headers=self.headers)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
next_data = soup.find("script", {"id": "__NEXT_DATA__"})
if next_data:
next_data_json = json.loads(next_data.string)
return next_data_json
else:
logger.error("검색 결과에서 '__NEXT_DATA__' 태그를 찾을 수 없음.")
except requests.exceptions.RequestException as e:
logger.error(f"네이버 쇼핑 HTML 가져오기 실패: {e}")
return None
def get_product_list(self, data: Dict) -> List[Dict]:
"""검색 결과에서 제품 리스트를 추출."""
try:
products_list = data["props"]["pageProps"]["initialState"]["products"]["list"]
logger.debug(f"{len(products_list)}개의 제품이 검색됨.")
return products_list
except KeyError as e:
logger.error(f"제품 리스트를 추출하는 중 오류 발생: {e}")
return []
def filter_products_by_price(self, products: List[Dict], min_price: int = 50000) -> List[Dict]:
"""최소 가격 기준으로 제품을 필터링."""
filtered_products = [product for product in products if int(product.get("item", {}).get("price", 0)) > min_price]
logger.debug(f"가격이 {min_price}원 이상인 제품 {len(filtered_products)}개 필터링됨.")
return filtered_products
def extract_product_info(self, product: Dict) -> Dict:
"""제품 정보에서 필요한 데이터만 추출."""
item = product.get("item", {})
product_info = {
"title": item.get("productTitle"),
"price": item.get("price"),
"mall_name": item.get("mallName"),
"image_url": item.get("imageUrl"),
"product_url": item.get("mallProductUrl"),
"category": [
item.get("category1Name"),
item.get("category2Name"),
item.get("category3Name"),
item.get("category4Name")
],
"rank": item.get("rank"),
"review_count": item.get("reviewCount"),
"review_count_sum": item.get("reviewCountSum"),
"score_info": item.get("scoreInfo"),
"mobile_low_price": item.get("mobileLowPrice"),
"low_price": item.get("lowPrice"),
"delivery_fee_content": item.get("deliveryFeeContent"),
"dlvry_low_price": item.get("dlvryLowPrice"),
"open_date": item.get("openDate"),
"mall_count": item.get("mallCount"),
"keep_count": item.get("keepCnt"),
"oversea_tp": item.get("overseaTp"),
"purchase_count": item.get("purchaseCnt"),
"manu_tag": item.get("manuTag"),
"img_size": item.get("imgSz"),
"search_keyword": item.get("searchKeyword"),
"mall_pc_url": item.get("mallPcUrl"),
}
return product_info
def get_top_n_products(self, products: List[Dict], top_n: int = 5) -> List[Dict]:
"""상위 N개의 제품 정보 추출."""
sorted_products = sorted(products, key=lambda p: int(p.get("item", {}).get("rank", 0)))
top_products = sorted_products[:top_n]
logger.debug(f"상위 {top_n}개 제품을 추출함.")
return [self.extract_product_info(product) for product in top_products]
def get_related_tags(self, data: Dict) -> List[str]:
"""연관 검색어를 추출."""
try:
related_tags = data["props"]["pageProps"]["relatedTags"]
filtered_tags = [tag.strip() for tag in related_tags if tag]
logger.debug(f"연관 검색어: {filtered_tags}")
return filtered_tags
except KeyError:
logger.error("연관 검색어를 추출하는 중 오류 발생.")
return []
def search_and_parse(self, keyword: str, min_price: int = 10000, top_n: int = 5) -> Dict:
"""키워드로 검색 후 필터링된 상위 제품 정보 및 연관 검색어 반환."""
data = self.fetch_search_results(keyword)
if data:
products = self.get_product_list(data)
filtered_products = self.filter_products_by_price(products, min_price)
top_products = self.get_top_n_products(filtered_products, top_n)
related_tags = self.get_related_tags(data)
return {
"top_products": top_products,
"related_tags": related_tags
}
return {}
# 사용 예제
if __name__ == "__main__":
parser = NaverParser()
keyword = "순간접착제"
result = parser.search_and_parse(keyword)
print("검색 결과:", json.dumps(result, ensure_ascii=False, indent=4))