import requests from bs4 import BeautifulSoup import json import logging from typing import List, Dict, Optional # 로거 인스턴스 가져오기 logger = logging.getLogger('default_logger') class NaverParser: def __init__(self): self.base_url = "https://search.shopping.naver.com/search/all?query=" self.headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Accept-Language": "en-US,en;q=0.9", "Accept-Encoding": "gzip, deflate, br", "DNT": "1", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "Cache-Control": "max-age=0" } def fetch_search_results(self, keyword: str, product_set: str = "overseas") -> Optional[Dict]: """네이버 쇼핑에서 키워드 검색 결과를 가져옴. product_set 인자로 제품 유형을 선택 가능.""" url = f"{self.base_url}{keyword}&frm=NVSHATC&pagingIndex=1&pagingSize=40&productSet={product_set}&sort=rel×tamp=&viewType=list" logger.debug(f"검색 URL: {url}") try: response = requests.get(url, headers=self.headers) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') next_data = soup.find("script", {"id": "__NEXT_DATA__"}) if next_data: next_data_json = json.loads(next_data.string) return next_data_json else: logger.error("검색 결과에서 '__NEXT_DATA__' 태그를 찾을 수 없음.") except requests.exceptions.RequestException as e: logger.error(f"네이버 쇼핑 HTML 가져오기 실패: {e}") return None def get_product_list(self, data: Dict) -> List[Dict]: """검색 결과에서 제품 리스트를 추출.""" try: products_list = data["props"]["pageProps"]["initialState"]["products"]["list"] logger.debug(f"총 {len(products_list)}개의 제품이 검색됨.") return products_list except KeyError as e: logger.error(f"제품 리스트를 추출하는 중 오류 발생: {e}") return [] def filter_products_by_price(self, products: List[Dict], min_price: int = 50000) -> List[Dict]: """최소 가격 기준으로 제품을 필터링.""" filtered_products = [product for product in products if int(product.get("item", {}).get("price", 0)) > min_price] logger.debug(f"가격이 {min_price}원 이상인 제품 {len(filtered_products)}개 필터링됨.") return filtered_products def extract_product_info(self, product: Dict) -> Dict: """제품 정보에서 필요한 데이터만 추출.""" item = product.get("item", {}) product_info = { "title": item.get("productTitle"), "price": item.get("price"), "mall_name": item.get("mallName"), "image_url": item.get("imageUrl"), "product_url": item.get("mallProductUrl"), "category": [ item.get("category1Name"), item.get("category2Name"), item.get("category3Name"), item.get("category4Name") ], "rank": item.get("rank"), "review_count": item.get("reviewCount"), "review_count_sum": item.get("reviewCountSum"), "score_info": item.get("scoreInfo"), "mobile_low_price": item.get("mobileLowPrice"), "low_price": item.get("lowPrice"), "delivery_fee_content": item.get("deliveryFeeContent"), "dlvry_low_price": item.get("dlvryLowPrice"), "open_date": item.get("openDate"), "mall_count": item.get("mallCount"), "keep_count": item.get("keepCnt"), "oversea_tp": item.get("overseaTp"), "purchase_count": item.get("purchaseCnt"), "manu_tag": item.get("manuTag"), "img_size": item.get("imgSz"), "search_keyword": item.get("searchKeyword"), "mall_pc_url": item.get("mallPcUrl"), } return product_info def get_top_n_products(self, products: List[Dict], top_n: int = 5) -> List[Dict]: """상위 N개의 제품 정보 추출.""" sorted_products = sorted(products, key=lambda p: int(p.get("item", {}).get("rank", 0))) top_products = sorted_products[:top_n] logger.debug(f"상위 {top_n}개 제품을 추출함.") return [self.extract_product_info(product) for product in top_products] def get_related_tags(self, data: Dict) -> List[str]: """연관 검색어를 추출.""" try: related_tags = data["props"]["pageProps"]["relatedTags"] filtered_tags = [tag.strip() for tag in related_tags if tag] logger.debug(f"연관 검색어: {filtered_tags}") return filtered_tags except KeyError: logger.error("연관 검색어를 추출하는 중 오류 발생.") return [] def search_and_parse(self, keyword: str, min_price: int = 10000, top_n: int = 5) -> Dict: """키워드로 검색 후 필터링된 상위 제품 정보 및 연관 검색어 반환.""" data = self.fetch_search_results(keyword) if data: products = self.get_product_list(data) filtered_products = self.filter_products_by_price(products, min_price) top_products = self.get_top_n_products(filtered_products, top_n) related_tags = self.get_related_tags(data) return { "top_products": top_products, "related_tags": related_tags } return {} # 사용 예제 if __name__ == "__main__": parser = NaverParser() keyword = "순간접착제" result = parser.search_and_parse(keyword) print("검색 결과:", json.dumps(result, ensure_ascii=False, indent=4))