가지치기 여부 추가

This commit is contained in:
EnvyPC 2024-04-15 21:10:03 +09:00
parent 3a0de892b2
commit 1eaa7e50bc
4 changed files with 144 additions and 32 deletions

View File

@ -25,7 +25,7 @@ logger = logging.getLogger('default_logger')
# return None, None # 조건에 맞는 항목을 찾지 못한 경우
def parse_naver_shopping(keyword_id, keyword, json_data, conn, overPrice, sortcount=5):
def parse_naver_shopping(keyword_id, keyword, isBranch, branchCount, json_data, conn, overPrice, sortcount=5):
# naver_code = code[0]
# base_cat_code = code[1]
@ -156,15 +156,44 @@ def parse_naver_shopping(keyword_id, keyword, json_data, conn, overPrice, sortco
# original relatedTags 리스트 가져오기
related_tags_ori = next_data_json["props"]["pageProps"]["relatedTags"]
# relatedTags 리스트 생성
related_tags = [tag.strip() for tag in related_tags_ori if tag]
# relatedTags 리스트 생성 , 초기화 및 검증
if related_tags_ori:
related_tags = [tag.strip() for tag in related_tags_ori if tag] # related_tags_ori에서 공백 제거 후 related_tags에 할당
else:
related_tags = []
logger.debug(f"현재 키워드인 [{keyword}]에 대한 연관검색어는 [{related_tags}] 입니다.")
# branchCount 값이 문자열로 전달될 가능성이 있으므로, 정수로 변환
try:
branchCount = int(branchCount)
except ValueError:
logger.error(f"branchCount 값이 정수로 변환되지 않습니다: {branchCount}")
branchCount = 0 # 기본값 설정
if not isBranch:
logger.debug(f"가지치기 설정이 [{isBranch}] 이므로 연관검색어 초기화")
related_tags = [""] * 5 # 5개의 빈 문자열로 초기화
related_tags_ori = [""] * 5 # related_tags_ori 리스트도 동일하게 초기화 필요시 추가
else:
logger.debug(f"가지치기 설정이 [{isBranch}] 이므로 {branchCount}개 연관검색어 보존")
# branchCount의 수만큼 원래 값을 유지하고, 나머지는 빈칸으로 채움
if branchCount < len(related_tags):
related_tags = related_tags[:branchCount] + [""] * (5 - branchCount)
related_tags_ori = related_tags_ori[:branchCount] + [""] * (5 - branchCount)
else:
# branchCount가 related_tags의 길이보다 크거나 같은 경우, 모든 값을 그대로 유지
related_tags = related_tags[:5] # 리스트 길이를 5로 제한
related_tags_ori = related_tags_ori[:5] # related_tags_ori도 동일하게 처리
# DB에 기록하기
for index, product in enumerate(final_top_5_products):
# 첫 번째 제품에 대한 keyword 처리
logger.debug(f"{keyword}의 검색결과 [{index}]번째 상품 처리")
current_keyword = keyword if index == 0 else related_tags_ori[index - 1] if index - 1 < len(related_tags_ori) else keyword
logger.debug(f"{keyword}의 검색결과 [{index+1}]번째 상품 처리")
set_keyword = keyword if index == 0 else related_tags_ori[index - 1] if index - 1 < len(related_tags_ori) else keyword
current_keyword = set_keyword
logger.debug(f"가지치기 상태 : [{isBranch}] 이므로 {index+1}번째 [current_keyword]에는 [{set_keyword}] 할당 ")
# current_keyword = keyword
price = product.get("item", {}).get("price")
productTitle = product.get("item", {}).get("productTitle")
@ -206,7 +235,7 @@ def parse_naver_shopping(keyword_id, keyword, json_data, conn, overPrice, sortco
""", (category1Name, category2Name, category3Name, category4Name or None))
cat_code_result = c.fetchone()
cat_code = cat_code_result[0] if cat_code_result else ""
logger.debug(f"[{index}/{len(final_top_5_products)}]번째 상품 cat_code 8자리 숫자 : {cat_code}")
logger.debug(f"[{index+1}/{len(final_top_5_products)}]번째 상품 cat_code 8자리 숫자 : {cat_code}")
# 날짜와 시간을 문자열로 변환하기 (예: '2023-12-19', '14:30:00')
date_created = now.strftime('%Y-%m-%d')

View File

@ -54,7 +54,7 @@
<rect>
<x>130</x>
<y>20</y>
<width>51</width>
<width>61</width>
<height>31</height>
</rect>
</property>
@ -74,7 +74,7 @@
<widget class="QLabel" name="selkeyword_2">
<property name="geometry">
<rect>
<x>80</x>
<x>70</x>
<y>20</y>
<width>64</width>
<height>31</height>
@ -87,8 +87,8 @@
<widget class="QLabel" name="selkeyword_3">
<property name="geometry">
<rect>
<x>80</x>
<y>74</y>
<x>70</x>
<y>70</y>
<width>64</width>
<height>21</height>
</rect>
@ -100,7 +100,7 @@
<widget class="QLineEdit" name="sortcount">
<property name="geometry">
<rect>
<x>150</x>
<x>140</x>
<y>70</y>
<width>31</width>
<height>31</height>
@ -160,7 +160,7 @@ XLS저장</string>
<widget class="QLabel" name="matchCount">
<property name="geometry">
<rect>
<x>190</x>
<x>200</x>
<y>20</y>
<width>64</width>
<height>31</height>
@ -173,7 +173,7 @@ XLS저장</string>
<widget class="QLabel" name="matchCountbox">
<property name="geometry">
<rect>
<x>250</x>
<x>260</x>
<y>20</y>
<width>64</width>
<height>31</height>
@ -2204,7 +2204,7 @@ background-color: rgb(170, 255, 255);
<attribute name="title">
<string>모바일탭-텍스트검색</string>
</attribute>
<widget class="QWebEngineView" name="webEngineView">
<widget class="QWebEngineView" name="webEngineView" native="true">
<property name="geometry">
<rect>
<x>0</x>
@ -2213,12 +2213,12 @@ background-color: rgb(170, 255, 255);
<height>731</height>
</rect>
</property>
<property name="url">
<property name="url" stdset="0">
<url>
<string>https://m.intl.taobao.com/</string>
</url>
</property>
<property name="zoomFactor">
<property name="zoomFactor" stdset="0">
<double>0.500000000000000</double>
</property>
</widget>
@ -2227,7 +2227,7 @@ background-color: rgb(170, 255, 255);
<attribute name="title">
<string>PC탭-이미지검색</string>
</attribute>
<widget class="QWebEngineView" name="webEngineView_2">
<widget class="QWebEngineView" name="webEngineView_2" native="true">
<property name="geometry">
<rect>
<x>10</x>
@ -2236,12 +2236,12 @@ background-color: rgb(170, 255, 255);
<height>731</height>
</rect>
</property>
<property name="url">
<property name="url" stdset="0">
<url>
<string>https://world.taobao.com/</string>
</url>
</property>
<property name="zoomFactor">
<property name="zoomFactor" stdset="0">
<double>0.400000000000000</double>
</property>
</widget>
@ -2695,7 +2695,7 @@ font: 75 16pt &quot;Arial&quot;;
<property name="geometry">
<rect>
<x>30</x>
<y>30</y>
<y>10</y>
<width>61</width>
<height>31</height>
</rect>
@ -2720,7 +2720,7 @@ font: 75 16pt &quot;Arial&quot;;
<property name="geometry">
<rect>
<x>330</x>
<y>30</y>
<y>10</y>
<width>51</width>
<height>31</height>
</rect>
@ -2745,7 +2745,7 @@ font: 75 16pt &quot;Arial&quot;;
<property name="geometry">
<rect>
<x>220</x>
<y>30</y>
<y>10</y>
<width>61</width>
<height>31</height>
</rect>
@ -2770,7 +2770,7 @@ font: 75 16pt &quot;Arial&quot;;
<property name="geometry">
<rect>
<x>110</x>
<y>30</y>
<y>10</y>
<width>31</width>
<height>31</height>
</rect>
@ -2786,7 +2786,7 @@ font: 75 16pt &quot;Arial&quot;;
<property name="geometry">
<rect>
<x>170</x>
<y>30</y>
<y>10</y>
<width>31</width>
<height>31</height>
</rect>
@ -2802,7 +2802,7 @@ font: 75 16pt &quot;Arial&quot;;
<property name="geometry">
<rect>
<x>140</x>
<y>30</y>
<y>10</y>
<width>31</width>
<height>31</height>
</rect>
@ -2815,7 +2815,7 @@ font: 75 16pt &quot;Arial&quot;;
<property name="geometry">
<rect>
<x>200</x>
<y>30</y>
<y>10</y>
<width>31</width>
<height>31</height>
</rect>
@ -2940,7 +2940,7 @@ font: 75 12pt &quot;Cafe24 Ssurround Bold&quot;</string>
<property name="geometry">
<rect>
<x>20</x>
<y>70</y>
<y>60</y>
<width>51</width>
<height>41</height>
</rect>
@ -2976,7 +2976,7 @@ font: 75 12pt &quot;Cafe24 Ssurround Bold&quot;</string>
<property name="geometry">
<rect>
<x>70</x>
<y>70</y>
<y>60</y>
<width>51</width>
<height>41</height>
</rect>
@ -2990,7 +2990,7 @@ font: 75 12pt &quot;Cafe24 Ssurround Bold&quot;</string>
<property name="geometry">
<rect>
<x>120</x>
<y>70</y>
<y>60</y>
<width>51</width>
<height>41</height>
</rect>
@ -3027,6 +3027,67 @@ font: 75 12pt &quot;Cafe24 Ssurround Bold&quot;</string>
스크래핑</string>
</property>
</widget>
<widget class="QGroupBox" name="groupBox_7">
<property name="geometry">
<rect>
<x>170</x>
<y>60</y>
<width>201</width>
<height>51</height>
</rect>
</property>
<property name="title">
<string>가지치기 여부</string>
</property>
<widget class="QCheckBox" name="branch_checkBox">
<property name="geometry">
<rect>
<x>10</x>
<y>20</y>
<width>81</width>
<height>21</height>
</rect>
</property>
<property name="text">
<string> 허용</string>
</property>
</widget>
<widget class="QLineEdit" name="branchCount">
<property name="geometry">
<rect>
<x>90</x>
<y>20</y>
<width>21</width>
<height>21</height>
</rect>
</property>
<property name="text">
<string>1</string>
</property>
<property name="echoMode">
<enum>QLineEdit::Normal</enum>
</property>
<property name="cursorPosition">
<number>1</number>
</property>
<property name="alignment">
<set>Qt::AlignCenter</set>
</property>
</widget>
<widget class="QLabel" name="matchCount_5">
<property name="geometry">
<rect>
<x>120</x>
<y>20</y>
<width>41</width>
<height>21</height>
</rect>
</property>
<property name="text">
<string>갯수</string>
</property>
</widget>
</widget>
</widget>
<customwidgets>
<customwidget>

View File

@ -11,12 +11,14 @@ logger = logging.getLogger('default_logger')
class ScrapingThread(QThread):
progress_updated = pyqtSignal(int, int)
def __init__(self, conn, keywords, json_data, overPrice, sortcount, parent=None):
def __init__(self, conn, keywords, isBranch, branchCount, json_data, overPrice, sortcount, parent=None):
super(ScrapingThread, self).__init__(parent)
self.conn = conn
self.keywords = keywords
self.overPrice = overPrice
self.sortcount = sortcount
self.isBranch = isBranch
self.branchCount = branchCount
# naver_code가 아닌 naver_codes로 변수명 변경하여 리스트 처리를 명시
self.json_data = json_data
# self.codes = codes
@ -36,7 +38,7 @@ class ScrapingThread(QThread):
# except IndexError:
# logging.error(f"IndexError: 'naver_codes' 리스트의 길이가 충분하지 않습니다. idx={idx}")
# naver_code = "" # 기본 naver_code 값을 설정하거나 오류 처리
parse_naver_shopping(idx, keyword, self.json_data, self.conn, self.overPrice, self.sortcount)
parse_naver_shopping(idx, keyword, self.isBranch, self.branchCount, self.json_data, self.conn, self.overPrice, self.sortcount)
current_progress = idx + 1
self.progress_updated.emit(total_items, current_progress)
sleep_time_for_scraping = randint(1, 4)

View File

@ -298,7 +298,14 @@ class Ui_Dialog(QtWidgets.QDialog):
# Qt Designer에서 생성한 UI 클래스 내에서 버튼 클릭 이벤트 연결
self.papagoApi.clicked.connect(self.manage_papago_api_keys)
self.branch_checkBox = self.findChild(QtWidgets.QCheckBox, 'branch_checkBox')
self.branchCount = self.findChild(QtWidgets.QLineEdit, 'branchCount')
self.branchCount.setEnabled(False)
# 체크박스 상태 변화에 따라 슬롯 연결
self.branch_checkBox.stateChanged.connect(self.updatebranchCountState)
# 프로그램 초기화 부분에 API 키 관리 함수 호출
# self.manage_papago_api_keys()
@ -508,6 +515,13 @@ class Ui_Dialog(QtWidgets.QDialog):
# logger.debug(f"on_tao_image_save_btn_clicked 실행 중 예외 발생: {e}")
# # 여기서 필요한 경우 추가적인 예외 처리 코드를 작성할 수 있습니다.
def updatebranchCountState(self):
if self.branch_checkBox.isChecked():
self.branchCount.setEnabled(True) # 체크박스가 체크되면 QLineEdit 활성화
else:
self.branchCount.setEnabled(False) # 체크박스가 체크되지 않으면 QLineEdit 비활성화
# self.branchCount.clear() # 선택이 해제되면 내용을 지웁니다.
# img_search 함수 구현
def img_search(self):
# 웹 페이지 로드
@ -2159,6 +2173,12 @@ class Ui_Dialog(QtWidgets.QDialog):
def on_ns_scraping_clicked(self):
try:
isBranch = self.branch_checkBox.isChecked()
if isBranch:
branchCount = self.branchCount.text()
else:
branchCount = 0
logger.debug(f"가지치기 설정 : {isBranch}")
#conn = sqlite3.connect(self.db_name)
self.conndb = sqlite3.connect(self.db_name, check_same_thread=False)
logger.debug(f"선택된 DB: {self.db_name}")
@ -2181,7 +2201,7 @@ class Ui_Dialog(QtWidgets.QDialog):
# logger.debug(f"naver_code : {naver_code}")
QMessageBox.information(self, "알림", "네이버쇼핑 스크래핑 시작!")
self.scraping_thread = ScrapingThread(self.conndb, keywords, self.json_data, self.overPrice.text(), self.sortcount.text())
self.scraping_thread = ScrapingThread(self.conndb, keywords, isBranch, branchCount, self.json_data, self.overPrice.text(), self.sortcount.text())
self.scraping_thread.progress_updated.connect(self.update_progress_bar)
# self.scraping_thread.finished.connect(self.on_scraping_finished) # 스크래핑 완료 후 처리
self.scraping_thread.finished.connect(self.start_image_save_thread) # 스크래핑 완료 후 이미지 저장 스레드 시작