Resell1/tests/input_market.py

134 lines
5.0 KiB
Python

import asyncio
import re
import sys
from bs4 import BeautifulSoup
from playwright.sync_api import sync_playwright
from supabase import create_client, Client
import getpass
# HTML 파일에서 스마트스토어 URL 추출 함수
def extract_market_urls(html_file_path):
with open(html_file_path, "r", encoding="utf-8") as f:
html_content = f.read()
soup = BeautifulSoup(html_content, "html.parser")
links = soup.find_all("a", href=True)
market_urls = []
for link in links:
href = link["href"]
if href.startswith("https://smartstore.naver.com"):
market_urls.append(href)
# 중복 제거
return list(set(market_urls))
# Playwright를 사용하여 마켓 정보 수집 함수
def fetch_market_info(url):
# 기본값 설정
market_name = ""
market_grade = ""
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
try:
page.goto(url, timeout=60000) # 60초 timeout
page.wait_for_load_state("domcontentloaded", timeout=60000)
# market_name 추출
try:
name_elem = page.query_selector("div#pc-storeNameWidget span")
if name_elem:
market_name = name_elem.inner_text().strip()
except Exception as e:
print(f"Error fetching market_name from {url}: {e}")
# market_grade 추출
try:
grade_elem = page.query_selector("div#pc-sellerInfoWidget div > div > div > div:nth-child(1)")
if grade_elem:
spans = grade_elem.query_selector_all("span")
if len(spans) >= 3:
market_grade = spans[2].inner_text().strip()
else:
market_grade = grade_elem.inner_text().strip()
except Exception as e:
print(f"Error fetching market_grade from {url}: {e}")
except Exception as e:
print(f"Error loading page {url}: {e}")
finally:
browser.close()
return market_name, market_grade
# Supabase 로그인 및 데이터 삽입 함수 (market_url 중복 검사 추가)
def supabase_insert_markets(supabase_url: str, supabase_key: str, market_data: list):
"""
market_data: 리스트로 [(market_url, market_name, market_grade), ...]
"""
supabase: Client = create_client(supabase_url, supabase_key)
for url, name, grade in market_data:
# 중복 검사: market_url이 이미 존재하는지 확인
existing = supabase.table("markets").select("*").eq("market_url", url).execute()
if existing.data:
print(f"{url} 은(는) 이미 존재합니다. 건너뜁니다.")
continue
data = {
"market_name": name,
"market_url": url,
"market_grade": grade,
"market_memo": ""
}
try:
response = supabase.table("markets").insert(data).execute()
if response.get("error"):
print(f"Failed to insert {url}: {response['error']['message']}")
else:
print(f"Inserted {url} successfully.")
except Exception as e:
print(f"Exception inserting {url}: {e}")
def main():
if len(sys.argv) < 2:
print("Usage: python module.py <html_file_path>")
sys.exit(1)
html_file_path = sys.argv[1]
# 1. HTML 파일에서 마켓 URL 추출
market_urls = extract_market_urls(html_file_path)
print(f"{len(market_urls)}개의 스마트스토어 URL을 찾았습니다.")
# 2. 각 URL에 대해 Playwright로 정보 수집
market_data = []
for url in market_urls:
print(f"Processing {url} ...")
name, grade = fetch_market_info(url)
print(f" market_name: {name}")
print(f" market_grade: {grade}")
market_data.append((url, name, grade))
# 3. Supabase 자격 증명 입력받기
print("Supabase 로그인 정보를 입력하세요.")
supabase_url = input("Supabase URL: ").strip()
supabase_id = input("Supabase Email (ID): ").strip()
supabase_pw = getpass.getpass("Supabase Password: ").strip()
# Supabase 클라이언트 생성 및 로그인 (실제 환경에 따라 인증 방식이 다를 수 있음)
supabase: Client = create_client(supabase_url, supabase_pw)
try:
auth_response = supabase.auth.sign_in(email=supabase_id, password=supabase_pw)
if auth_response.get("error"):
print(f"Supabase 로그인 실패: {auth_response['error']['message']}")
sys.exit(1)
else:
print("Supabase 로그인 성공!")
except Exception as e:
print(f"Supabase 로그인 예외: {e}")
sys.exit(1)
# 4. 수집한 데이터 Supabase에 삽입 (중복 검사 포함)
supabase_insert_markets(supabase_url, supabase_pw, market_data)
if __name__ == "__main__":
main()