InitialCommit
This commit is contained in:
commit
fccf48eb92
|
|
@ -0,0 +1,5 @@
|
|||
Lib/
|
||||
Include/
|
||||
Scripts/
|
||||
pyvenv.cfg
|
||||
*.log
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
import sys
|
||||
import logging
|
||||
from PySide6.QtWidgets import QApplication
|
||||
from src.gui import TaobaoScraperApp
|
||||
from src.databaseManager import DatabaseManager
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if __name__ == "__main__":
|
||||
app = QApplication(sys.argv)
|
||||
|
||||
db_manager = DatabaseManager() # 데이터베이스 매니저 인스턴스 생성
|
||||
window = TaobaoScraperApp(db_manager)
|
||||
|
||||
window.show()
|
||||
sys.exit(app.exec())
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
import sqlite3
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# SQLite DB 초기화 및 데이터 관리
|
||||
class DatabaseManager:
|
||||
def __init__(self, db_name="taobao_items.db"):
|
||||
self.conn = sqlite3.connect(db_name)
|
||||
self.cur = self.conn.cursor()
|
||||
self.create_table()
|
||||
|
||||
def create_table(self):
|
||||
try:
|
||||
self.cur.execute("""
|
||||
CREATE TABLE IF NOT EXISTS items (
|
||||
item_id TEXT PRIMARY KEY,
|
||||
pc_url TEXT,
|
||||
name TEXT,
|
||||
price REAL,
|
||||
image_url TEXT,
|
||||
sales TEXT
|
||||
)
|
||||
""")
|
||||
self.conn.commit()
|
||||
logger.info("데이터베이스 테이블 초기화 완료")
|
||||
except sqlite3.Error as e:
|
||||
logger.error(f"DB 테이블 생성 오류: {e}")
|
||||
|
||||
def insert_items(self, items_data):
|
||||
try:
|
||||
self.cur.executemany("""
|
||||
INSERT OR REPLACE INTO items (item_id, pc_url, name, price, image_url, sales)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""", items_data)
|
||||
self.conn.commit()
|
||||
logger.info("데이터 삽입 완료")
|
||||
except sqlite3.Error as e:
|
||||
logger.error(f"DB 삽입 오류: {e}")
|
||||
|
||||
def fetch_all_items(self):
|
||||
try:
|
||||
self.cur.execute("SELECT * FROM items")
|
||||
return self.cur.fetchall()
|
||||
except sqlite3.Error as e:
|
||||
logger.error(f"DB 조회 오류: {e}")
|
||||
return []
|
||||
|
||||
def close(self):
|
||||
self.conn.close()
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
import logging
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ExcelExporter:
|
||||
def __init__(self, db_manager):
|
||||
self.db_manager = db_manager
|
||||
|
||||
def export_to_excel(self, file_name="taobao_items.xlsx"):
|
||||
try:
|
||||
data = self.db_manager.fetch_all_items()
|
||||
df = pd.DataFrame(data, columns=["item_id", "pc_url", "name", "price", "image_url", "sales"])
|
||||
df.to_excel(file_name, index=False)
|
||||
logger.info("엑셀 파일로 저장 완료")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"엑셀 저장 오류: {e}")
|
||||
return False
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
from PySide6.QtWidgets import QWidget, QVBoxLayout, QPushButton, QLabel, QMessageBox
|
||||
from PySide6.QtCore import Slot
|
||||
import logging
|
||||
from playwright_thread import PlaywrightThread
|
||||
from excel_export import ExcelExporter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class TaobaoScraperApp(QWidget):
|
||||
def __init__(self, db_manager):
|
||||
super().__init__()
|
||||
self.db_manager = db_manager
|
||||
self.setWindowTitle("Taobao Scraper")
|
||||
self.layout = QVBoxLayout()
|
||||
|
||||
self.start_button = QPushButton("시작")
|
||||
self.start_button.clicked.connect(self.start_scraping)
|
||||
self.collect_button = QPushButton("수집")
|
||||
self.collect_button.clicked.connect(self.collect_data)
|
||||
self.excel_button = QPushButton("엑셀출력")
|
||||
self.excel_button.clicked.connect(self.export_to_excel)
|
||||
self.close_button = QPushButton("닫기")
|
||||
self.close_button.clicked.connect(self.close)
|
||||
|
||||
self.layout.addWidget(QLabel("Taobao Scraper"))
|
||||
self.layout.addWidget(self.start_button)
|
||||
self.layout.addWidget(self.collect_button)
|
||||
self.layout.addWidget(self.excel_button)
|
||||
self.layout.addWidget(self.close_button)
|
||||
|
||||
self.setLayout(self.layout)
|
||||
|
||||
self.playwright_thread = PlaywrightThread(self.db_manager)
|
||||
self.playwright_thread.data_collected.connect(self.on_data_collected)
|
||||
self.excel_exporter = ExcelExporter(self.db_manager)
|
||||
|
||||
@Slot()
|
||||
def start_scraping(self):
|
||||
logger.info("Playwright 스레드 시작")
|
||||
self.playwright_thread.start()
|
||||
|
||||
@Slot()
|
||||
def collect_data(self):
|
||||
logger.info("수집 버튼 클릭됨 - 데이터 수집 시작")
|
||||
self.playwright_thread.run()
|
||||
|
||||
@Slot()
|
||||
def export_to_excel(self):
|
||||
success = self.excel_exporter.export_to_excel()
|
||||
if success:
|
||||
QMessageBox.information(self, "엑셀 출력", "엑셀 파일로 저장 완료")
|
||||
else:
|
||||
QMessageBox.critical(self, "오류", "엑셀 저장 오류")
|
||||
|
||||
@Slot(bool, str)
|
||||
def on_data_collected(self, success, message):
|
||||
if success:
|
||||
QMessageBox.information(self, "수집 완료", message)
|
||||
else:
|
||||
QMessageBox.warning(self, "수집 실패", message)
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
import asyncio
|
||||
import logging
|
||||
from PySide6.QtCore import QThread, Signal
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class PlaywrightThread(QThread):
|
||||
data_collected = Signal(bool, str)
|
||||
|
||||
def __init__(self, db_manager):
|
||||
super().__init__()
|
||||
self.db_manager = db_manager
|
||||
|
||||
async def collect_data(self):
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=False)
|
||||
context = await browser.new_context(stealth=True, viewport={"width": 1280, "height": 720})
|
||||
page = await context.new_page()
|
||||
await page.goto("https://a-site.com")
|
||||
logger.info("A 사이트에 접속했습니다.")
|
||||
await self.wait_for_user()
|
||||
|
||||
items_data = await self.scrape_items(page)
|
||||
if items_data:
|
||||
self.db_manager.insert_items(items_data)
|
||||
self.data_collected.emit(True, "데이터 수집 완료")
|
||||
else:
|
||||
self.data_collected.emit(False, "데이터 수집 실패")
|
||||
await browser.close()
|
||||
|
||||
async def scrape_items(self, page):
|
||||
try:
|
||||
items = await page.query_selector_all(".tb-pick-content-item")
|
||||
items_data = []
|
||||
for item in items:
|
||||
item_id = await item.get_attribute("href").split("itemIds=")[-1]
|
||||
pc_url = f"https://item.taobao.com/item.htm?id={item_id}"
|
||||
name = await item.query_selector(".info-wrapper-title-text").inner_text()
|
||||
price = await item.query_selector(".price-value").inner_text()
|
||||
image_url = await item.query_selector(".img-wrapper").get_attribute("style").split("url(")[-1].strip('")')
|
||||
sales = await item.query_selector(".month-sale").inner_text()
|
||||
|
||||
items_data.append((item_id, pc_url, name, float(price), image_url, sales))
|
||||
return items_data
|
||||
except Exception as e:
|
||||
logger.error(f"데이터 수집 오류: {e}")
|
||||
return None
|
||||
|
||||
async def wait_for_user(self):
|
||||
await asyncio.sleep(2)
|
||||
|
||||
def run(self):
|
||||
asyncio.run(self.collect_data())
|
||||
Loading…
Reference in New Issue