import asyncio import re import sys from bs4 import BeautifulSoup from playwright.sync_api import sync_playwright from supabase import create_client, Client import getpass # HTML 파일에서 스마트스토어 URL 추출 함수 def extract_market_urls(html_file_path): with open(html_file_path, "r", encoding="utf-8") as f: html_content = f.read() soup = BeautifulSoup(html_content, "html.parser") links = soup.find_all("a", href=True) market_urls = [] for link in links: href = link["href"] if href.startswith("https://smartstore.naver.com"): market_urls.append(href) # 중복 제거 return list(set(market_urls)) # Playwright를 사용하여 마켓 정보 수집 함수 def fetch_market_info(url): # 기본값 설정 market_name = "" market_grade = "" with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_page() try: page.goto(url, timeout=60000) # 60초 timeout page.wait_for_load_state("domcontentloaded", timeout=60000) # market_name 추출 try: name_elem = page.query_selector("div#pc-storeNameWidget span") if name_elem: market_name = name_elem.inner_text().strip() except Exception as e: print(f"Error fetching market_name from {url}: {e}") # market_grade 추출 try: grade_elem = page.query_selector("div#pc-sellerInfoWidget div > div > div > div:nth-child(1)") if grade_elem: spans = grade_elem.query_selector_all("span") if len(spans) >= 3: market_grade = spans[2].inner_text().strip() else: market_grade = grade_elem.inner_text().strip() except Exception as e: print(f"Error fetching market_grade from {url}: {e}") except Exception as e: print(f"Error loading page {url}: {e}") finally: browser.close() return market_name, market_grade # Supabase 로그인 및 데이터 삽입 함수 (market_url 중복 검사 추가) def supabase_insert_markets(supabase_url: str, supabase_key: str, market_data: list): """ market_data: 리스트로 [(market_url, market_name, market_grade), ...] """ supabase: Client = create_client(supabase_url, supabase_key) for url, name, grade in market_data: # 중복 검사: market_url이 이미 존재하는지 확인 existing = supabase.table("markets").select("*").eq("market_url", url).execute() if existing.data: print(f"{url} 은(는) 이미 존재합니다. 건너뜁니다.") continue data = { "market_name": name, "market_url": url, "market_grade": grade, "market_memo": "" } try: response = supabase.table("markets").insert(data).execute() if response.get("error"): print(f"Failed to insert {url}: {response['error']['message']}") else: print(f"Inserted {url} successfully.") except Exception as e: print(f"Exception inserting {url}: {e}") def main(): if len(sys.argv) < 2: print("Usage: python module.py ") sys.exit(1) html_file_path = sys.argv[1] # 1. HTML 파일에서 마켓 URL 추출 market_urls = extract_market_urls(html_file_path) print(f"총 {len(market_urls)}개의 스마트스토어 URL을 찾았습니다.") # 2. 각 URL에 대해 Playwright로 정보 수집 market_data = [] for url in market_urls: print(f"Processing {url} ...") name, grade = fetch_market_info(url) print(f" market_name: {name}") print(f" market_grade: {grade}") market_data.append((url, name, grade)) # 3. Supabase 자격 증명 입력받기 print("Supabase 로그인 정보를 입력하세요.") supabase_url = input("Supabase URL: ").strip() supabase_id = input("Supabase Email (ID): ").strip() supabase_pw = getpass.getpass("Supabase Password: ").strip() # Supabase 클라이언트 생성 및 로그인 (실제 환경에 따라 인증 방식이 다를 수 있음) supabase: Client = create_client(supabase_url, supabase_pw) try: auth_response = supabase.auth.sign_in(email=supabase_id, password=supabase_pw) if auth_response.get("error"): print(f"Supabase 로그인 실패: {auth_response['error']['message']}") sys.exit(1) else: print("Supabase 로그인 성공!") except Exception as e: print(f"Supabase 로그인 예외: {e}") sys.exit(1) # 4. 수집한 데이터 Supabase에 삽입 (중복 검사 포함) supabase_insert_markets(supabase_url, supabase_pw, market_data) if __name__ == "__main__": main()