InitialCommit

This commit is contained in:
R5600U_PC 2024-10-31 16:28:46 +09:00
commit fccf48eb92
6 changed files with 205 additions and 0 deletions

5
.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
Lib/
Include/
Scripts/
pyvenv.cfg
*.log

17
main.py Normal file
View File

@ -0,0 +1,17 @@
import sys
import logging
from PySide6.QtWidgets import QApplication
from src.gui import TaobaoScraperApp
from src.databaseManager import DatabaseManager
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
if __name__ == "__main__":
app = QApplication(sys.argv)
db_manager = DatabaseManager() # 데이터베이스 매니저 인스턴스 생성
window = TaobaoScraperApp(db_manager)
window.show()
sys.exit(app.exec())

50
src/databaseManager.py Normal file
View File

@ -0,0 +1,50 @@
import sqlite3
import logging
logger = logging.getLogger(__name__)
# SQLite DB 초기화 및 데이터 관리
class DatabaseManager:
def __init__(self, db_name="taobao_items.db"):
self.conn = sqlite3.connect(db_name)
self.cur = self.conn.cursor()
self.create_table()
def create_table(self):
try:
self.cur.execute("""
CREATE TABLE IF NOT EXISTS items (
item_id TEXT PRIMARY KEY,
pc_url TEXT,
name TEXT,
price REAL,
image_url TEXT,
sales TEXT
)
""")
self.conn.commit()
logger.info("데이터베이스 테이블 초기화 완료")
except sqlite3.Error as e:
logger.error(f"DB 테이블 생성 오류: {e}")
def insert_items(self, items_data):
try:
self.cur.executemany("""
INSERT OR REPLACE INTO items (item_id, pc_url, name, price, image_url, sales)
VALUES (?, ?, ?, ?, ?, ?)
""", items_data)
self.conn.commit()
logger.info("데이터 삽입 완료")
except sqlite3.Error as e:
logger.error(f"DB 삽입 오류: {e}")
def fetch_all_items(self):
try:
self.cur.execute("SELECT * FROM items")
return self.cur.fetchall()
except sqlite3.Error as e:
logger.error(f"DB 조회 오류: {e}")
return []
def close(self):
self.conn.close()

19
src/excel_export.py Normal file
View File

@ -0,0 +1,19 @@
import logging
import pandas as pd
logger = logging.getLogger(__name__)
class ExcelExporter:
def __init__(self, db_manager):
self.db_manager = db_manager
def export_to_excel(self, file_name="taobao_items.xlsx"):
try:
data = self.db_manager.fetch_all_items()
df = pd.DataFrame(data, columns=["item_id", "pc_url", "name", "price", "image_url", "sales"])
df.to_excel(file_name, index=False)
logger.info("엑셀 파일로 저장 완료")
return True
except Exception as e:
logger.error(f"엑셀 저장 오류: {e}")
return False

60
src/gui.py Normal file
View File

@ -0,0 +1,60 @@
from PySide6.QtWidgets import QWidget, QVBoxLayout, QPushButton, QLabel, QMessageBox
from PySide6.QtCore import Slot
import logging
from playwright_thread import PlaywrightThread
from excel_export import ExcelExporter
logger = logging.getLogger(__name__)
class TaobaoScraperApp(QWidget):
def __init__(self, db_manager):
super().__init__()
self.db_manager = db_manager
self.setWindowTitle("Taobao Scraper")
self.layout = QVBoxLayout()
self.start_button = QPushButton("시작")
self.start_button.clicked.connect(self.start_scraping)
self.collect_button = QPushButton("수집")
self.collect_button.clicked.connect(self.collect_data)
self.excel_button = QPushButton("엑셀출력")
self.excel_button.clicked.connect(self.export_to_excel)
self.close_button = QPushButton("닫기")
self.close_button.clicked.connect(self.close)
self.layout.addWidget(QLabel("Taobao Scraper"))
self.layout.addWidget(self.start_button)
self.layout.addWidget(self.collect_button)
self.layout.addWidget(self.excel_button)
self.layout.addWidget(self.close_button)
self.setLayout(self.layout)
self.playwright_thread = PlaywrightThread(self.db_manager)
self.playwright_thread.data_collected.connect(self.on_data_collected)
self.excel_exporter = ExcelExporter(self.db_manager)
@Slot()
def start_scraping(self):
logger.info("Playwright 스레드 시작")
self.playwright_thread.start()
@Slot()
def collect_data(self):
logger.info("수집 버튼 클릭됨 - 데이터 수집 시작")
self.playwright_thread.run()
@Slot()
def export_to_excel(self):
success = self.excel_exporter.export_to_excel()
if success:
QMessageBox.information(self, "엑셀 출력", "엑셀 파일로 저장 완료")
else:
QMessageBox.critical(self, "오류", "엑셀 저장 오류")
@Slot(bool, str)
def on_data_collected(self, success, message):
if success:
QMessageBox.information(self, "수집 완료", message)
else:
QMessageBox.warning(self, "수집 실패", message)

54
src/playwright_thread.py Normal file
View File

@ -0,0 +1,54 @@
import asyncio
import logging
from PySide6.QtCore import QThread, Signal
from playwright.async_api import async_playwright
logger = logging.getLogger(__name__)
class PlaywrightThread(QThread):
data_collected = Signal(bool, str)
def __init__(self, db_manager):
super().__init__()
self.db_manager = db_manager
async def collect_data(self):
async with async_playwright() as p:
browser = await p.chromium.launch(headless=False)
context = await browser.new_context(stealth=True, viewport={"width": 1280, "height": 720})
page = await context.new_page()
await page.goto("https://a-site.com")
logger.info("A 사이트에 접속했습니다.")
await self.wait_for_user()
items_data = await self.scrape_items(page)
if items_data:
self.db_manager.insert_items(items_data)
self.data_collected.emit(True, "데이터 수집 완료")
else:
self.data_collected.emit(False, "데이터 수집 실패")
await browser.close()
async def scrape_items(self, page):
try:
items = await page.query_selector_all(".tb-pick-content-item")
items_data = []
for item in items:
item_id = await item.get_attribute("href").split("itemIds=")[-1]
pc_url = f"https://item.taobao.com/item.htm?id={item_id}"
name = await item.query_selector(".info-wrapper-title-text").inner_text()
price = await item.query_selector(".price-value").inner_text()
image_url = await item.query_selector(".img-wrapper").get_attribute("style").split("url(")[-1].strip('")')
sales = await item.query_selector(".month-sale").inner_text()
items_data.append((item_id, pc_url, name, float(price), image_url, sales))
return items_data
except Exception as e:
logger.error(f"데이터 수집 오류: {e}")
return None
async def wait_for_user(self):
await asyncio.sleep(2)
def run(self):
asyncio.run(self.collect_data())