썸네일 적용완료
This commit is contained in:
parent
2f94b6e02b
commit
9f8b65b307
|
|
@ -13,14 +13,10 @@ from io import BytesIO
|
|||
|
||||
from whale_new import WhaleTranslator
|
||||
from clipboardImageManager import ClipboardImageManager
|
||||
from vertexAI import VertexAITranslator
|
||||
from option import OptionHandler
|
||||
from price import PriceHandler
|
||||
from title import TitleHandler
|
||||
from locatorManager import LocatorManager
|
||||
from src.cmb_diag import CMBSettingsDialog
|
||||
from src.DatabaseManager import DatabaseManager
|
||||
|
||||
from thumb import ThumbnailHandler
|
||||
class BrowserController(QThread):
|
||||
# 브라우저 시작 시그널 정의
|
||||
browser_started = Signal()
|
||||
|
|
@ -58,7 +54,7 @@ class BrowserController(QThread):
|
|||
self.optionHandler = OptionHandler(self.locator_manager, self, self.whale_translator, self.clipboardImageManager, self.logger, self.vertexAI, debug_flag=self.toggle_states['debug_mode'])
|
||||
self.priceHandler = PriceHandler(self.locator_manager, self, self.logger, self.optionHandler, self.vertexAI, self.cmb_diag, debug_flag=self.toggle_states['debug_mode'])
|
||||
self.titleHandler = TitleHandler(self.locator_manager, self, self.logger)
|
||||
|
||||
self.thumbnailHandler = ThumbnailHandler(self.locator_manager, self, self.logger, self.whale_translator, self.clipboardImageManager, self.toggle_states)
|
||||
|
||||
# BrowserController에 해당하는 모든 locator를 정의
|
||||
self.chrome_window_name = self.locator_manager.get_locator('BrowserControl', 'chrome_window_name')
|
||||
|
|
@ -117,8 +113,12 @@ class BrowserController(QThread):
|
|||
# WhaleTranslator 필요 여부 확인 및 초기화
|
||||
optionIMGTrans_status = self.toggle_states.get('optionIMGTrans', False)
|
||||
detail_IMGTrans_status = self.toggle_states.get('detail_IMGTrans', False)
|
||||
if optionIMGTrans_status or detail_IMGTrans_status:
|
||||
self.logger.debug(f"optionIMGTrans_status: {optionIMGTrans_status}, detail_IMGTrans_status: {detail_IMGTrans_status}")
|
||||
thumb_status = self.toggle_states.get('thumb', False)
|
||||
|
||||
self.logger.debug(f"optionIMGTrans_status: {optionIMGTrans_status}, detail_IMGTrans_status: {detail_IMGTrans_status}, thumb_status: {thumb_status}")
|
||||
|
||||
if optionIMGTrans_status or detail_IMGTrans_status or thumb_status:
|
||||
self.logger.debug('이미지번역을 위해 웨일을 로드합니다...')
|
||||
self.whale_translator = WhaleTranslator(self.logger)
|
||||
self.whale_translator.start_whale_browser()
|
||||
|
||||
|
|
@ -206,6 +206,14 @@ class BrowserController(QThread):
|
|||
self.logger.info('신규 상품 등록 페이지로 이동 중...')
|
||||
await self.go_to_new_product_page()
|
||||
|
||||
|
||||
# 각 핸들러에 초기화된 page 객체 전달.
|
||||
self.optionHandler.update_page(self.page)
|
||||
self.titleHandler.update_page(self.page)
|
||||
self.priceHandler.update_page(self.page)
|
||||
self.thumbnailHandler.update_page(self.page)
|
||||
|
||||
|
||||
self.browser_started.emit() # 브라우저 시작 신호
|
||||
except Exception as e:
|
||||
self.logger.error(f"브라우저 시작 오류: {str(e)}", exc_info=True)
|
||||
|
|
@ -544,6 +552,14 @@ class BrowserController(QThread):
|
|||
except Exception as e:
|
||||
self.logger.error(f"가격 탭 클릭 중 오류: {e}", exc_info=True)
|
||||
|
||||
async def click_thumb_tab(self):
|
||||
"""썸네일 탭 클릭"""
|
||||
try:
|
||||
await self.page.click(self.thumb_tab_locator)
|
||||
self.logger.info("썸네일 탭 클릭 완료.")
|
||||
except Exception as e:
|
||||
self.logger.error(f"썸네일 탭 클릭 중 오류: {e}", exc_info=True)
|
||||
|
||||
async def click_title_tab(self):
|
||||
"""상품명 탭 클릭"""
|
||||
try:
|
||||
|
|
@ -1053,7 +1069,7 @@ class BrowserController(QThread):
|
|||
await self.scroll_page_to_top()
|
||||
self.logger.debug(f'1페이지가 아니므로 상품의 동적로딩을 위해 휠 스크롤 업')
|
||||
|
||||
is_ed_mode = self.toggle_states.get['ed_mode']
|
||||
is_ed_mode = self.toggle_states.get('ed_mode', False)
|
||||
if not is_ed_mode:
|
||||
# 4. 현재 페이지의 모든 "세부사항 수정 및 업로드" 버튼 찾기
|
||||
self.logger.debug('수정모드가 아니므로 상품수정 버튼 elements를 수집합니다.')
|
||||
|
|
@ -1071,7 +1087,7 @@ class BrowserController(QThread):
|
|||
self.logger.debug('수정할 상품이 없습니다. 작업을 종료합니다.')
|
||||
break
|
||||
|
||||
is_recovery_mode = self.toggle_states.get['recovery_mode']
|
||||
is_recovery_mode = self.toggle_states.get('recovery_mode', False)
|
||||
if is_recovery_mode:
|
||||
deleted_imgs = self.deleted_img_urls_from_logs()
|
||||
|
||||
|
|
@ -1096,34 +1112,38 @@ class BrowserController(QThread):
|
|||
product_category = await self.titleHandler.get_category(market='ss') # 카테고리 가져오기
|
||||
|
||||
# 옵션 수정
|
||||
is_optionTrnas = self.toggle_states.get['optionTrnas']
|
||||
|
||||
if self.toggle_states['optionTrnas'] or self.toggle_states['optionIMGTrans'] or self.toggle_states['optionAutoSelect']:
|
||||
self.logger.debug(f"옵션수정 : optionTrnas={self.toggle_states['optionTrnas']} + optionIMGTrans={self.toggle_states['optionIMGTrans']} + optionAutoSelect{self.toggle_states['optionAutoSelect']}")
|
||||
is_optionTrnas = self.toggle_states.get('optionTrnas', False)
|
||||
is_optionIMGTrans = self.toggle_states.get('optionIMGTrans', False)
|
||||
is_optionAutoSelect = self.toggle_states.get('optionAutoSelect', False)
|
||||
|
||||
if is_optionTrnas or is_optionIMGTrans or is_optionAutoSelect:
|
||||
self.logger.debug(f"옵션수정 : optionTrnas={is_optionTrnas} + optionIMGTrans={is_optionIMGTrans} + optionAutoSelect{is_optionAutoSelect}")
|
||||
await self.edit_option(product_name)
|
||||
|
||||
# 가격 수정
|
||||
if self.toggle_states['price']:
|
||||
self.logger.debug(f"가격수정 : {self.toggle_states['price']} ")
|
||||
is_price = self.toggle_states.get('price', False)
|
||||
if is_price:
|
||||
self.logger.debug(f"가격수정 : {is_price} ")
|
||||
await self.edit_price(product_category)
|
||||
|
||||
# 썸네일 수정
|
||||
if self.toggle_states['thumb']:
|
||||
pass
|
||||
if self.toggle_states.get('thumb',False):
|
||||
self.logger.debug(f"썸네일수정 : {self.toggle_states['thumb']} ")
|
||||
await self.edit_thumb()
|
||||
|
||||
# 태그 수정
|
||||
if self.toggle_states['tag']:
|
||||
if self.toggle_states.get('tag',False):
|
||||
pass
|
||||
|
||||
# 상품명 수정
|
||||
if self.toggle_states['title']:
|
||||
if self.toggle_states.get('title',False):
|
||||
pass
|
||||
|
||||
# 상세페이지 수정
|
||||
if self.toggle_states['detail_Option'] or self.toggle_states['detail_IMGTrans']:
|
||||
if self.toggle_states.get('detail_Option',False) or self.toggle_states.get('detail_IMGTrans',False):
|
||||
self.logger.debug(f"상세페이지 수정 : {self.toggle_states['detail_Option']} + {self.toggle_states['detail_IMGTrans']}")
|
||||
|
||||
if not self.toggle_states['recovery_mode']:
|
||||
if not self.toggle_states.get('recovery_mode',False):
|
||||
await self.detail_trans()
|
||||
else:
|
||||
await self.detail_trans_for_recovery(product_name, deleted_imgs)
|
||||
|
|
@ -1221,6 +1241,17 @@ class BrowserController(QThread):
|
|||
await self.save_product_edit()
|
||||
|
||||
|
||||
async def edit_thumb(self):
|
||||
# 상세페이지 탭 클릭
|
||||
await self.click_thumb_tab()
|
||||
|
||||
# 가격 수정 프로세스
|
||||
await self.thumbnailHandler.process_thumbnails()
|
||||
|
||||
# 수정 후 저장
|
||||
await self.save_product_edit()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1301,7 +1332,6 @@ class BrowserController(QThread):
|
|||
def cleanup(self):
|
||||
pass
|
||||
|
||||
|
||||
def check_pause(self):
|
||||
"""일시 정지 상태라면 재개될 때까지 대기"""
|
||||
self.pause_mutex.lock()
|
||||
|
|
|
|||
|
|
@ -1,976 +0,0 @@
|
|||
from playwright.async_api import async_playwright, TimeoutError
|
||||
from PySide6.QtCore import QThread, Signal
|
||||
import re
|
||||
import pyautogui
|
||||
import time
|
||||
import win32gui, win32con
|
||||
from bs4 import BeautifulSoup
|
||||
import asyncio
|
||||
import os, sys, random
|
||||
import requests
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
|
||||
class BrowserController(QThread):
|
||||
data_collected = Signal(bool, str)
|
||||
|
||||
def __init__(self, app, logger, locator_manager, login_infos, toggle_states):
|
||||
super().__init__()
|
||||
self.logger = logger
|
||||
self.log_files = ["appTranslator.log", "appTranslator.log.1", "appTranslator.log.2", "appTranslator.log.3", "appTranslator.log.4", "appTranslator.log.5"]
|
||||
self.locator_manager = locator_manager
|
||||
self.toggle_states = toggle_states
|
||||
self.login_infos = login_infos
|
||||
self.chrome_hwnd = None
|
||||
self.whale_hwnd = None
|
||||
|
||||
self.whale_browser = None # 필요한 경우 whale_browser 객체를 설정
|
||||
self.playwright = None
|
||||
self.browser = None
|
||||
self.page = None
|
||||
|
||||
|
||||
# BrowserController에 해당하는 모든 locator를 정의
|
||||
self.chrome_window_name = self.locator_manager.get_locator('BrowserControl', 'chrome_window_name')
|
||||
self.login_email_locator = self.locator_manager.get_locator('BrowserControl', 'login_email_locator')
|
||||
self.login_password_locator = self.locator_manager.get_locator('BrowserControl', 'login_password_locator')
|
||||
self.login_button_locator = self.locator_manager.get_locator('BrowserControl', 'login_button_locator')
|
||||
self.admin_toggle_locator = self.locator_manager.get_locator('BrowserControl', 'admin_toggle_locator')
|
||||
self.staff_id_locator = self.locator_manager.get_locator('BrowserControl', 'staff_id_locator')
|
||||
self.staff_login_button_locator = self.locator_manager.get_locator('BrowserControl', 'staff_login_button_locator')
|
||||
self.close_ad_dialog_locator = self.locator_manager.get_locator('BrowserControl', 'close_ad_dialog_locator')
|
||||
self.close_ad_button_locator = self.locator_manager.get_locator('BrowserControl', 'close_ad_button_locator')
|
||||
self.total_product_count_locator = self.locator_manager.get_locator('BrowserControl', 'total_product_count_locator')
|
||||
self.total_product_count_for_registed_locator = self.locator_manager.get_locator('BrowserControl', 'total_product_count_for_registed_locator')
|
||||
self.product_parent_locator= self.locator_manager.get_locator('BrowserControl', 'product_parent_locator')
|
||||
self.product_name_inner_locator = self.locator_manager.get_locator('BrowserControl', 'product_name_inner_locator')
|
||||
self.product_price_inner_locator = self.locator_manager.get_locator('BrowserControl', 'product_price_inner_locator')
|
||||
self.product_image_inner_locator = self.locator_manager.get_locator('BrowserControl', 'product_image_inner_locator')
|
||||
self.product_name_for_ed_template = self.locator_manager.get_locator('BrowserControl', 'product_name_for_ed_template')
|
||||
self.product_price_for_ed_template = self.locator_manager.get_locator('BrowserControl', 'product_price_for_ed_template')
|
||||
self.product_image_for_ed_template = self.locator_manager.get_locator('BrowserControl', 'product_image_for_ed_template')
|
||||
self.product_edit_button_template = self.locator_manager.get_locator('BrowserControl', 'product_edit_button_template')
|
||||
self.current_page = self.locator_manager.get_locator('BrowserControl', 'current_page')
|
||||
self.next_page_button_template = self.locator_manager.get_locator('BrowserControl', 'next_page_button_template')
|
||||
self.new_product_page_locator = self.locator_manager.get_locator('BrowserControl', 'new_product_page_locator')
|
||||
self.registered_product_page_locator = self.locator_manager.get_locator('BrowserControl', 'registered_product_page_locator')
|
||||
self.current_page_locator = self.locator_manager.get_locator('BrowserControl', 'current_page_locator')
|
||||
self.source_button_locator = self.locator_manager.get_locator('BrowserControl', 'source_button_locator')
|
||||
self.ck_source_editing_area_locator = self.locator_manager.get_locator('BrowserControl', 'ck_source_editing_area_locator')
|
||||
self.option_input_field_locator = self.locator_manager.get_locator('BrowserControl', 'option_input_field_locator')
|
||||
self.title_tab_locator = self.locator_manager.get_locator('BrowserControl', 'title_tab_locator')
|
||||
self.option_tab_locator = self.locator_manager.get_locator('BrowserControl', 'option_tab_locator')
|
||||
self.price_tab_locator = self.locator_manager.get_locator('BrowserControl', 'price_tab_locator')
|
||||
self.tag_tab_locator = self.locator_manager.get_locator('BrowserControl', 'tag_tab_locator')
|
||||
self.thumb_tab_locator = self.locator_manager.get_locator('BrowserControl', 'thumb_tab_locator')
|
||||
self.detail_tab_locator = self.locator_manager.get_locator('BrowserControl', 'detail_tab_locator')
|
||||
self.upload_tab_locator = self.locator_manager.get_locator('BrowserControl', 'upload_tab_locator')
|
||||
self.save_button_locator = self.locator_manager.get_locator('BrowserControl', 'save_button_locator')
|
||||
|
||||
self.text_templates = self.locator_manager.selectors.get('DetailPageTextTemplates', {})
|
||||
|
||||
|
||||
# # 스레드 종료 시 close_whale_window_if_exists 호출
|
||||
# self.finished.connect(self.cleanup)
|
||||
|
||||
def get_page(self):
|
||||
return self.page
|
||||
|
||||
async def start_browser(self):
|
||||
"""크롬 브라우저 실행 및 페이지 로딩"""
|
||||
self.logger.debug('크롬 브라우저 실행 중...')
|
||||
|
||||
# Playwright를 수동으로 실행하여 브라우저 유지
|
||||
self.playwright = await async_playwright().start()
|
||||
|
||||
# cx_Freeze로 패키징된 경우와 일반 Python 실행 환경 구분하여 경로 설정
|
||||
if getattr(sys, 'frozen', False):
|
||||
browser_path = os.path.join(os.path.dirname(sys.executable), 'browsers', 'chromium-1112', 'chrome-win','chrome.exe')
|
||||
extension_path = os.path.join(os.path.dirname(sys.executable), 'browsers', 'extensions', '1.1.100_0')
|
||||
user_data_dir = os.path.join(os.path.dirname(sys.executable), 'browsers', 'user_data')
|
||||
else:
|
||||
browser_path = os.path.join(os.path.dirname(__file__), 'browsers', 'chromium-1112', 'chrome-win','chrome.exe')
|
||||
extension_path = os.path.join(os.path.dirname(__file__), 'browsers', 'extensions', '1.1.100_0')
|
||||
user_data_dir = os.path.join(os.path.dirname(__file__), 'browsers', 'user_data')
|
||||
|
||||
self.logger.debug(f"브라우저 경로: {browser_path}")
|
||||
self.logger.debug(f"확장 프로그램 경로: {extension_path}")
|
||||
self.logger.debug(f"사용자 폴더 경로: {user_data_dir}")
|
||||
|
||||
# 사용자 데이터 디렉토리가 존재하지 않으면 생성
|
||||
if not os.path.exists(user_data_dir):
|
||||
os.makedirs(user_data_dir)
|
||||
self.logger.debug(f"{user_data_dir} 디렉토리가 생성되었습니다.")
|
||||
|
||||
|
||||
# User agent 설정
|
||||
user_agent = random.choice([
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.0.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 12_0) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Safari/605.1.15",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 OPR/85.0.0.0",
|
||||
])
|
||||
self.logger.debug(f"user_agent: {user_agent}")
|
||||
|
||||
# 브라우저 시작 및 설정
|
||||
self.browser = await self.playwright.chromium.launch_persistent_context(
|
||||
user_data_dir,
|
||||
headless=False,
|
||||
permissions=["geolocation", "notifications"],
|
||||
geolocation={"latitude": 37.5665, "longitude": 126.9780},
|
||||
locale="ko-KR",
|
||||
args=[
|
||||
'--disable-popup-blocking',
|
||||
f'--disable-extensions-except={extension_path}',
|
||||
f'--load-extension={extension_path}',
|
||||
'--start-maximized',
|
||||
'--window-size=1920,1080'
|
||||
],
|
||||
executable_path=browser_path,
|
||||
user_agent=user_agent
|
||||
)
|
||||
|
||||
# 기본 페이지가 없을 수 있으므로 새로운 페이지 생성
|
||||
self.page = await self.browser.new_page()
|
||||
self.logger.info('새 페이지 로딩 중...')
|
||||
|
||||
await self.page.goto('https://percenty.co.kr/signin')
|
||||
self.logger.info('percenty.co.kr/signin 로딩 완료')
|
||||
|
||||
# 첫 번째 기본 탭 닫기
|
||||
if self.browser.pages:
|
||||
await self.browser.pages[0].close()
|
||||
|
||||
# 페이지 제목을 가져와서 창 제목으로 활용
|
||||
page_title = await self.page.title()
|
||||
self.logger.debug(f'페이지 제목: {page_title}')
|
||||
|
||||
# 창 핸들 찾기 (동적으로 얻은 페이지 제목 사용)
|
||||
self.chrome_hwnd = self.find_window_by_title(page_title)
|
||||
if not self.chrome_hwnd:
|
||||
self.logger.warning('크롬 창을 찾을 수 없습니다.')
|
||||
else:
|
||||
self.logger.debug(f'크롬 창 핸들: {self.chrome_hwnd}')
|
||||
|
||||
await self.login()
|
||||
await self.close_ad_if_exists()
|
||||
|
||||
if self.toggle_states['ed_mode']:
|
||||
await self.go_to_registered_product_page()
|
||||
self.logger.info('등록 상품 관리 페이지로 이동 중...')
|
||||
else:
|
||||
self.logger.info('신규 상품 등록 페이지로 이동 중...')
|
||||
await self.go_to_new_product_page()
|
||||
|
||||
async def login(self):
|
||||
"""로그인 처리"""
|
||||
is_admin = self.login_infos['is_admin']
|
||||
self.logger.info(f'로그인 시도 중: {"관리자" if is_admin else "직원"} 계정')
|
||||
|
||||
if is_admin:
|
||||
# 관리자 로그인 처리
|
||||
await self.page.fill(self.login_email_locator, self.login_infos['admin_id'])
|
||||
await self.page.fill(self.login_password_locator, self.login_infos['admin_pw'])
|
||||
await self.page.click(self.login_button_locator)
|
||||
else:
|
||||
# 관리자 토글 버튼을 클릭해서 직원 로그인 화면 활성화
|
||||
admin_toggle = self.page.locator(self.admin_toggle_locator)
|
||||
if await admin_toggle.get_attribute("aria-checked") == "true":
|
||||
await admin_toggle.click() # 관리자 모드에서 직원 모드로 전환
|
||||
|
||||
await self.page.fill(self.login_email_locator, self.login_infos['admin_id'])
|
||||
await self.page.fill(self.staff_id_locator, self.login_infos['user_id'])
|
||||
await self.page.fill(self.login_password_locator, self.login_infos['user_pw'])
|
||||
await self.page.click(self.staff_login_button_locator)
|
||||
|
||||
self.logger.info(f'로그인 완료: {"관리자" if is_admin else "직원"} 계정')
|
||||
|
||||
# await self.page.wait_for_load_state('networkidle', timeout=10000)
|
||||
|
||||
async def close_browser(self):
|
||||
"""브라우저 종료"""
|
||||
if self.browser:
|
||||
await self.browser.close()
|
||||
await self.playwright.stop()
|
||||
self.cleanup()
|
||||
self.logger.info('브라우저 종료됨.')
|
||||
|
||||
def find_window_by_title(self, window_name):
|
||||
"""창 제목을 통해 핸들을 찾는 메서드"""
|
||||
def enum_windows_callback(hwnd, result):
|
||||
if win32gui.IsWindowVisible(hwnd) and window_name in win32gui.GetWindowText(hwnd):
|
||||
result.append(hwnd)
|
||||
result = []
|
||||
win32gui.EnumWindows(enum_windows_callback, result)
|
||||
return result[0] if result else None
|
||||
|
||||
def switch_to_chrome(self):
|
||||
"""크롬으로 포커스 전환"""
|
||||
if self.chrome_hwnd:
|
||||
win32gui.ShowWindow(self.chrome_hwnd, win32con.SW_RESTORE)
|
||||
win32gui.SetForegroundWindow(self.chrome_hwnd)
|
||||
self.logger.debug('크롬 창으로 포커스 이동.')
|
||||
else:
|
||||
self.logger.error('크롬 창을 찾을 수 없습니다.')
|
||||
|
||||
|
||||
async def get_total_product_count_ori(self):
|
||||
try:
|
||||
# JavaScript로 해당 요소의 텍스트를 가져옴
|
||||
element_text = await self.page.evaluate('''() => {
|
||||
let element = document.querySelector('#root > div > div > div > div > main > div > div.sc-ezreuY.kYrYVh > div.sc-dChVcU.cRrUlt > div.sc-izQBue.dxiUJm > div > div:nth-child(1) > label > span:nth-child(2)');
|
||||
return element ? element.innerText : null;
|
||||
}''')
|
||||
|
||||
if element_text:
|
||||
self.logger.debug(f"가져온 텍스트: {element_text}") # 텍스트 확인용 로그
|
||||
# "총 xx개 상품"에서 숫자만 추출
|
||||
count = int(''.join(filter(str.isdigit, element_text)))
|
||||
return count
|
||||
else:
|
||||
self.logger.debug("요소를 찾을 수 없습니다.")
|
||||
return 0
|
||||
except Exception as e:
|
||||
self.logger.debug(f"상품 수를 가져오는 중 오류 발생: {e}", exc_info=True)
|
||||
return 0
|
||||
|
||||
|
||||
async def get_total_product_count(self):
|
||||
total_count = 0
|
||||
items_per_page = 0
|
||||
|
||||
try:
|
||||
# total_count_elements = await self.page.query_selector_all(".sc-dOvA-dm.jqRNYf")
|
||||
total_count_element = await self.page.query_selector("div#root span:has-text('개 상품')")
|
||||
items_per_page_element = await self.page.query_selector("div#root [title$='개씩 보기']")
|
||||
|
||||
self.logger.debug(f"total_count_element : {total_count_element}")
|
||||
|
||||
if total_count_element:
|
||||
total_count_text = await total_count_element.inner_text()
|
||||
if "총" in total_count_text and "개 상품" in total_count_text:
|
||||
total_count = int(''.join(re.findall(r'\d+', total_count_text)))
|
||||
self.logger.info(f"총 상품수 확인: {total_count} 개")
|
||||
|
||||
# 페이지당 상품 수 추출
|
||||
if items_per_page_element:
|
||||
items_per_page_text = await items_per_page_element.get_attribute("title")
|
||||
if items_per_page_text and "개씩 보기" in items_per_page_text:
|
||||
items_per_page = int(''.join(re.findall(r'\d+', items_per_page_text)))
|
||||
self.logger.info(f"페이지당 상품수 확인: {items_per_page} 개씩 보기")
|
||||
|
||||
# 결과 반환
|
||||
if total_count:
|
||||
return {"total_count": total_count, "items_per_page": items_per_page}
|
||||
|
||||
return {"total_count": 0, "items_per_page": 0}
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"상품 수를 가져오는 중 오류 발생: {e}", exc_info=True)
|
||||
return {"total_count": 0, "items_per_page": 0}
|
||||
|
||||
|
||||
# def fetch_image_urls_ori(self, html_content):
|
||||
# """
|
||||
# HTML 콘텐츠에서 모든 <img> 태그의 URL을 순서대로 추출하고 중복 제거.
|
||||
# """
|
||||
# soup = BeautifulSoup(html_content, 'html.parser')
|
||||
|
||||
# # 순서를 유지하면서 중복을 제거하기 위해 리스트 사용
|
||||
# image_urls = []
|
||||
# seen_urls = set()
|
||||
|
||||
# # <figure class="image"> 내부의 모든 <img> 태그 찾기
|
||||
# figures = soup.find_all('figure', class_='image')
|
||||
# for figure in figures:
|
||||
# img_tag = figure.find('img')
|
||||
# if img_tag and 'src' in img_tag.attrs:
|
||||
# url = img_tag['src']
|
||||
# if url not in seen_urls:
|
||||
# image_urls.append(url)
|
||||
# seen_urls.add(url) # 중복 방지
|
||||
|
||||
# # class="image_resized"를 가진 모든 <img> 태그 찾기
|
||||
# images_resized = soup.find_all('img', class_='image_resized')
|
||||
# for img in images_resized:
|
||||
# if img and 'src' in img.attrs:
|
||||
# url = img['src']
|
||||
# if url not in seen_urls:
|
||||
# image_urls.append(url)
|
||||
# seen_urls.add(url) # 중복 방지
|
||||
|
||||
# return image_urls
|
||||
|
||||
def fetch_image_urls(self, html_content):
|
||||
"""
|
||||
HTML 콘텐츠에서 모든 <img> 태그의 URL을 추출하는 함수.
|
||||
<figure> 안의 <img> 태그와 독립된 <img> 태그 모두 처리.
|
||||
"""
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
|
||||
# 모든 <img> 태그를 찾기
|
||||
image_urls = []
|
||||
img_tags = soup.find_all('img')
|
||||
|
||||
for img in img_tags:
|
||||
# img 태그에서 src 속성 추출
|
||||
if 'src' in img.attrs:
|
||||
image_url = img['src']
|
||||
image_urls.append(image_url)
|
||||
self.logger.debug(f"fetch_image_urls 에서 추출한 이미지URL 갯수 : {len(image_urls)} 개")
|
||||
|
||||
self.logger.debug(f"fetch_image_urls 에서 추출한 이미지URL 목록 : {image_urls}")
|
||||
|
||||
return image_urls
|
||||
|
||||
async def close_ad_if_exists(self):
|
||||
"""광고 다이얼로그가 있으면 닫기 버튼을 클릭하는 메서드"""
|
||||
try:
|
||||
# 광고 다이얼로그가 나타날 때까지 기다림
|
||||
await self.page.wait_for_selector(self.close_ad_dialog_locator, timeout=5000, state='visible')
|
||||
self.logger.info("다이얼로그가 발견되었습니다. 닫기 버튼을 클릭합니다.")
|
||||
|
||||
# 닫기 버튼 클릭
|
||||
close_button = await self.page.query_selector(self.close_ad_button_locator)
|
||||
if close_button:
|
||||
await close_button.click()
|
||||
self.logger.info("다이얼로그를 성공적으로 닫았습니다.")
|
||||
else:
|
||||
self.logger.warning("닫기 버튼을 찾지 못했습니다.")
|
||||
|
||||
except TimeoutError:
|
||||
# 다이얼로그가 없을 때: info 수준의 로그로 기록
|
||||
self.logger.info("다이얼로그가 발견되지 않았습니다. 타임아웃이 발생했습니다.")
|
||||
except Exception as e:
|
||||
# 다른 예외 상황 발생 시 error로 기록
|
||||
self.logger.error(f"다이얼로그 닫기 중 오류 발생: {e}", exc_info=True)
|
||||
|
||||
async def go_to_new_product_page(self):
|
||||
"""신규 상품 등록 페이지로 이동"""
|
||||
try:
|
||||
new_product_page_locator = self.locator_manager.get_locator('BrowserControl', 'new_product_page_locator')
|
||||
await self.page.click(new_product_page_locator)
|
||||
self.logger.info("신규 상품 등록 페이지로 이동 완료.")
|
||||
except Exception as e:
|
||||
self.logger.error(f"신규 상품 등록 페이지 이동 중 오류: {e}", exc_info=True)
|
||||
|
||||
|
||||
async def go_to_registered_product_page(self):
|
||||
"""신규 상품 등록 페이지로 이동"""
|
||||
try:
|
||||
registered_product_page_locator = self.locator_manager.get_locator('BrowserControl', 'registered_product_page_locator')
|
||||
await self.page.click(registered_product_page_locator)
|
||||
self.logger.info("등록 상품 관리 페이지로 이동 완료.")
|
||||
except Exception as e:
|
||||
self.logger.error(f"등록 상품 관리 페이지 이동 중 오류: {e}", exc_info=True)
|
||||
|
||||
|
||||
# async def get_product_edit_buttons(self):
|
||||
# """현재 페이지의 세부사항 수정 및 업로드 버튼을 찾기"""
|
||||
# try:
|
||||
# # 버튼 선택자를 가져옴
|
||||
# edit_button_selector = self.product_edit_button
|
||||
|
||||
# if not edit_button_selector:
|
||||
# self.logger.warning("상품 수정 버튼의 선택자를 찾을 수 없습니다.")
|
||||
# return []
|
||||
|
||||
# # 선택자를 사용해 버튼 객체를 찾음
|
||||
# buttons = self.page.locator(edit_button_selector)
|
||||
|
||||
# # 버튼이 존재하는지 확인
|
||||
# if await buttons.count() == 0:
|
||||
# self.logger.warning("세부사항 수정 및 업로드 버튼을 찾을 수 없습니다.")
|
||||
# return []
|
||||
|
||||
# count = await buttons.count()
|
||||
# self.logger.info(f"수정할 상품 개수: {count}")
|
||||
|
||||
# # 모든 버튼을 리스트로 반환
|
||||
# return [buttons.nth(i) for i in range(count)]
|
||||
|
||||
# except Exception as e:
|
||||
# self.logger.error(f"상품 수정 버튼을 찾는 중 오류: {e}", exc_info=True)
|
||||
# return []
|
||||
|
||||
async def is_button_disabled(self, button):
|
||||
"""버튼이 disabled 상태인지 확인"""
|
||||
try:
|
||||
# 버튼의 disabled 속성 확인
|
||||
is_disabled = await button.get_attribute('disabled')
|
||||
return is_disabled is not None # disabled 속성이 있으면 True 반환
|
||||
except Exception as e:
|
||||
self.logger.error(f"상품 수정 버튼 상태 확인 중 오류 발생: {e}", exc_info=True)
|
||||
return False # 오류 발생 시 기본적으로 활성화된 것으로 처리
|
||||
|
||||
async def get_product_edit_buttons_by_templete(self):
|
||||
"""현재 페이지의 세부사항 수정 및 업로드 버튼을 찾기"""
|
||||
try:
|
||||
# 버튼 선택자 설정
|
||||
# edit_button_selector_template = f'//button[span[text()="세부사항 수정 및 업로드"]]'
|
||||
self.product_edit_button_template
|
||||
# 선택자를 사용해 버튼 객체를 찾음
|
||||
buttons = self.page.locator(self.product_edit_button_template)
|
||||
|
||||
# 버튼이 존재하는지 확인
|
||||
button_count = await buttons.count()
|
||||
if button_count == 0:
|
||||
self.logger.warning("세부사항 수정 및 업로드 버튼을 찾을 수 없습니다.")
|
||||
return []
|
||||
|
||||
self.logger.info(f"현재 페이지의 수정할 상품 개수: {button_count}")
|
||||
|
||||
# 모든 버튼을 리스트로 반환
|
||||
return [buttons.nth(i) for i in range(button_count)]
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"상품 수정 버튼을 찾는 중 오류: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
|
||||
async def click_modify_button_by_text(self, index):
|
||||
"""인덱스에 해당하는 '세부사항 수정 및 업로드' 버튼 클릭"""
|
||||
try:
|
||||
# config.ini에서 선택자 가져오기
|
||||
button_template = self.locator_manager.get_locator('BrowserControl', 'product_edit_button_template')
|
||||
button_selector = f'({button_template})[{index}]'
|
||||
|
||||
button = await self.page.query_selector(button_selector)
|
||||
|
||||
# 버튼이 화면에 보이도록 스크롤 후 클릭
|
||||
if button:
|
||||
await button.scroll_into_view_if_needed()
|
||||
await self.page.evaluate('arguments[0].click();', button)
|
||||
self.logger.info(f'{index}번째 상품의 수정 버튼 클릭 완료')
|
||||
else:
|
||||
self.logger.warning(f'{index}번째 상품의 수정 버튼을 찾지 못했습니다.')
|
||||
except Exception as e:
|
||||
self.logger.error(f'{index}번째 상품의 수정 버튼 클릭 중 오류: {str(e)}')
|
||||
|
||||
|
||||
async def open_product_edit_dialog(self, button):
|
||||
"""상품 수정 다이얼로그 열기"""
|
||||
try:
|
||||
# 요소가 화면에 없을 경우 스크롤하여 보이도록 함
|
||||
await button.scroll_into_view_if_needed()
|
||||
self.logger.debug("상품의 '세부사항 수정 및 업로드' 버튼을 화면에 보이도록 스크롤.")
|
||||
|
||||
await button.click()
|
||||
self.logger.info("세부사항 수정 다이얼로그 열기 완료.")
|
||||
await self.page.wait_for_selector('div.ant-tabs-nav') # 다이얼로그가 완전히 로딩될 때까지 기다림
|
||||
except Exception as e:
|
||||
self.logger.error(f"세부사항 수정 다이얼로그 열기 중 오류: {e}", exc_info=True)
|
||||
|
||||
async def click_detail_tab(self):
|
||||
"""상세페이지 탭 클릭"""
|
||||
try:
|
||||
await self.page.click(self.detail_tab_locator)
|
||||
self.logger.info("상세페이지 탭 클릭 완료.")
|
||||
except Exception as e:
|
||||
self.logger.error(f"상세페이지 탭 클릭 중 오류: {e}", exc_info=True)
|
||||
|
||||
async def click_option_tab(self):
|
||||
"""옵션 탭 클릭"""
|
||||
try:
|
||||
await self.page.click(self.option_tab_locator)
|
||||
self.logger.info("옵션 탭 클릭 완료.")
|
||||
except Exception as e:
|
||||
self.logger.error(f"옵션 탭 클릭 중 오류: {e}", exc_info=True)
|
||||
|
||||
async def click_price_tab(self):
|
||||
"""가격 탭 클릭"""
|
||||
try:
|
||||
await self.page.click(self.price_tab_locator)
|
||||
self.logger.info("가격 탭 클릭 완료.")
|
||||
except Exception as e:
|
||||
self.logger.error(f"가격 탭 클릭 중 오류: {e}", exc_info=True)
|
||||
|
||||
async def click_title_tab(self):
|
||||
"""상품명 탭 클릭"""
|
||||
try:
|
||||
await self.page.click(self.title_tab_locator)
|
||||
self.logger.info("상품명 탭 클릭 완료.")
|
||||
except Exception as e:
|
||||
self.logger.error(f"상품명 탭 클릭 중 오류: {e}", exc_info=True)
|
||||
|
||||
def generate_restored_html(self, urls):
|
||||
"""이미지 URL 목록을 HTML 형식으로 변환하는 메서드"""
|
||||
html_content = '<p> </p>'
|
||||
for url in urls:
|
||||
html_content += f'<figure class="image"><img src="{url}" style="aspect-ratio:1/1;"></figure>\n'
|
||||
return html_content
|
||||
|
||||
def deleted_img_urls_from_logs(self):
|
||||
"""로그 파일에서 상품명과 이미지 URL 목록을 추출하여 딕셔너리로 반환하는 메서드"""
|
||||
image_data = {}
|
||||
log_dir = os.path.join(os.path.dirname(__file__), "recovery_log")
|
||||
|
||||
# 로그 파일에서 필요한 정보만 추출
|
||||
for log_file in self.log_files:
|
||||
log_path = os.path.join(log_dir, log_file)
|
||||
if os.path.exists(log_path):
|
||||
with open(log_path, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
current_product = None
|
||||
for line in lines:
|
||||
# 상품명 추출
|
||||
product_match = re.search(r"원본 상품명 '(.+?)'", line)
|
||||
if product_match:
|
||||
current_product = product_match.group(1)
|
||||
image_data[current_product] = []
|
||||
|
||||
# 이미지 URL 목록 추출
|
||||
url_match = re.search(r"fetch_image_urls 에서 추출한 이미지URL 목록 : \[(.+?)\]", line)
|
||||
if url_match and current_product:
|
||||
# 각 URL에서 불필요한 작은따옴표 제거
|
||||
urls = [url.strip("'\"") for url in url_match.group(1).split(", ")]
|
||||
image_data[current_product].extend(urls)
|
||||
current_product = None # Reset after each product's URL extraction
|
||||
|
||||
self.logger.debug(f"복구된 이미지 URL 데이터: {image_data}")
|
||||
return image_data
|
||||
|
||||
async def recovery_image_urls(self, product_name, deleted_img_urls):
|
||||
"""상품명과 삭제된 이미지 URL 데이터를 이용해 복구 작업을 수행하는 메서드"""
|
||||
self.logger.debug("상품명과 삭제된 이미지 URL 데이터를 이용해 복구 작업을 수행하는 메서드")
|
||||
|
||||
if product_name in deleted_img_urls:
|
||||
# 소스 편집 모드로 전환
|
||||
source_button_locator = self.locator_manager.get_locator('BrowserControl', 'source_button_locator')
|
||||
ck_source_editing_area_locator = self.locator_manager.get_locator('BrowserControl', 'ck_source_editing_area_locator')
|
||||
await self.page.click(source_button_locator)
|
||||
self.logger.debug("recovery_image_urls : 소스 버튼 클릭 완료.")
|
||||
|
||||
# 기존 extract_image_urls와 유사하게 HTML 소스를 가져옴
|
||||
textarea = await self.page.wait_for_selector(ck_source_editing_area_locator, timeout=5000)
|
||||
data_value = await textarea.get_attribute("data-value")
|
||||
|
||||
# HTML 소스에서 이미지 URL 추출
|
||||
image_urls = self.fetch_image_urls(data_value)
|
||||
self.logger.info(f'recovery_image_urls추출된 이미지 URL 수: {len(image_urls)}')
|
||||
|
||||
# 이미지 태그가 없으면 로그에서 추출한 데이터를 HTML로 복원하여 입력
|
||||
if len(image_urls) == 0:
|
||||
restored_html = self.generate_restored_html(deleted_img_urls[product_name])
|
||||
await self.page.evaluate(f'document.querySelector("{ck_source_editing_area_locator}").setAttribute("data-value", `{restored_html}`)')
|
||||
self.logger.debug("recovery_image_urls로그 데이터를 이용하여 HTML 복원 완료.")
|
||||
else:
|
||||
self.logger.debug("이미 이미지가 있으므로 복원 작업을 패스합니다.")
|
||||
|
||||
# 소스 편집 모드 종료
|
||||
await self.page.click(source_button_locator)
|
||||
self.logger.debug('소스 버튼 재 클릭 완료.')
|
||||
else:
|
||||
self.logger.debug(f"로그에 해당 상품명 '{product_name}'에 대한 데이터가 존재하지 않습니다.")
|
||||
|
||||
def generate_restored_html(self, urls):
|
||||
"""이미지 URL 목록을 HTML 형식으로 변환하는 메서드, 각 이미지의 가로세로 비율 추가"""
|
||||
html_content = '<p> </p>\n'
|
||||
for url in urls:
|
||||
width, height = self.get_image_size(url)
|
||||
aspect_ratio = f"{width}/{height}" if width and height else "1/1"
|
||||
if width and height:
|
||||
html_content += (
|
||||
f'<figure class="image">'
|
||||
f'<img style="aspect-ratio:{aspect_ratio};" src="{url}" width="{width}" height="{height}">'
|
||||
f'</figure>\n'
|
||||
)
|
||||
else:
|
||||
# 이미지 크기를 확인할 수 없을 경우 기본 형식으로 추가
|
||||
html_content += f'<figure class="image"><img src="{url}"></figure>\n'
|
||||
return html_content
|
||||
|
||||
def get_image_size(self, url):
|
||||
"""이미지 URL로부터 가로와 세로 크기를 가져오는 메서드"""
|
||||
try:
|
||||
# URL에서 불필요한 따옴표 제거
|
||||
cleaned_url = url.strip("'\"")
|
||||
|
||||
response = requests.get(cleaned_url, timeout=5)
|
||||
response.raise_for_status()
|
||||
image = Image.open(BytesIO(response.content))
|
||||
return image.width, image.height
|
||||
except Exception as e:
|
||||
self.logger.warning(f"이미지 크기 확인 실패 - {cleaned_url}: {e}")
|
||||
return None, None
|
||||
|
||||
|
||||
async def extract_image_urls(self, optionHandler, is_option_data=False):
|
||||
"""상세페이지에서 이미지 URL 추출"""
|
||||
try:
|
||||
# 소스 편집 모드로 전환
|
||||
source_button_locator = self.locator_manager.get_locator('BrowserControl', 'source_button_locator')
|
||||
ck_source_editing_area_locator = self.locator_manager.get_locator('BrowserControl', 'ck_source_editing_area_locator')
|
||||
|
||||
# 소스 편집 모드로 전환
|
||||
await self.page.click(source_button_locator)
|
||||
self.logger.debug("소스 버튼 클릭 완료.")
|
||||
|
||||
|
||||
# 'data-value' 속성 값을 추출 (textarea 요소)
|
||||
textarea = await self.page.wait_for_selector(ck_source_editing_area_locator, timeout=5000)
|
||||
data_value = await textarea.get_attribute("data-value")
|
||||
|
||||
|
||||
# HTML 소스에서 이미지 URL 추출
|
||||
image_urls = self.fetch_image_urls(data_value)
|
||||
self.logger.info(f'추출된 이미지 URL 수: {len(image_urls)}')
|
||||
|
||||
# HTML 소스에서 이미지 URL 삭제
|
||||
self.logger.debug('img 태그를 삭제 중...')
|
||||
data_value_element = await self.page.query_selector(ck_source_editing_area_locator)
|
||||
new_value = ""
|
||||
if data_value_element:
|
||||
await self.page.evaluate(f'() => document.querySelector("{ck_source_editing_area_locator}").setAttribute("data-value", "{new_value}")')
|
||||
updated_value = await data_value_element.get_attribute('data-value')
|
||||
self.logger.debug(f'Updated data-value: {updated_value}')
|
||||
else:
|
||||
self.logger.debug('Element with data-value not found.')
|
||||
self.logger.debug('img 태그 삭제 완료.')
|
||||
|
||||
# img 태그의 class 삭제 후 다시 소스 버튼 클릭
|
||||
await self.page.click(source_button_locator)
|
||||
self.logger.debug('소스 버튼 재 클릭 완료.')
|
||||
|
||||
|
||||
if is_option_data:
|
||||
self.logger.debug('옵션 데이터 입력 시작')
|
||||
option_data = {} # option_data 초기화
|
||||
option_data = optionHandler.get_selected_translated_options()
|
||||
is_single = optionHandler.option_info['is_single_option']
|
||||
|
||||
is_single = True # 옵션입력 일단 제외
|
||||
self.logger.debug('옵션입력 일단 제외')
|
||||
|
||||
self.logger.debug('가져온 옵션 데이터')
|
||||
self.logger.debug(f'{option_data}')
|
||||
|
||||
# 옵션 입력 필드 선택
|
||||
input_field = await self.page.wait_for_selector(self.option_input_field_locator, timeout=5000)
|
||||
await input_field.press('Enter')
|
||||
|
||||
# 선두부 텍스트 입력
|
||||
for key in sorted(self.text_templates.keys()):
|
||||
leading_text = self.text_templates[key]
|
||||
if 'leading_text' in key and leading_text: # leading_text 항목만 가져오기
|
||||
await input_field.type(leading_text)
|
||||
await input_field.press('Enter')
|
||||
self.logger.info(f"{key} 텍스트 입력 완료: {leading_text}")
|
||||
|
||||
if not is_single:
|
||||
self.logger.info('단일옵션이 아니므로 옵션목록을 입력')
|
||||
|
||||
# 각 옵션을 한 줄씩 입력
|
||||
await input_field.type("# 옵션 목록")
|
||||
await input_field.press('Enter')
|
||||
|
||||
# 첫 번째 옵션의 번역된 옵션명만 입력
|
||||
first_key = list(option_data.keys())[0]
|
||||
first_value = option_data[first_key]
|
||||
await input_field.type(f"- 1. {first_value}")
|
||||
await input_field.press('Enter') # 첫 번째 옵션 이후 엔터로 줄바꿈
|
||||
|
||||
# 나머지 옵션도 번역된 옵션명만 입력
|
||||
for i, (key, value) in enumerate(list(option_data.items())[1:], start=2):
|
||||
await input_field.type(f"{i}. {value}") # 옵션 번호와 번역된 옵션명만 입력
|
||||
await input_field.press('Enter') # 엔터 키를 입력하여 줄바꿈
|
||||
|
||||
# 목록 끝을 알리기 위해 엔터 두 번 입력
|
||||
await input_field.press('Enter')
|
||||
await input_field.press('Enter')
|
||||
|
||||
# 후두부 텍스트 입력
|
||||
await input_field.type('### 나열된 옵션목록 이외의 옵션이 필요하실 경우 고객센터로 연락주세요.')
|
||||
await input_field.press('Enter')
|
||||
await input_field.type('---')
|
||||
await input_field.press('Enter')
|
||||
|
||||
self.logger.info('옵션 데이터 입력 완료')
|
||||
|
||||
return image_urls
|
||||
except Exception as e:
|
||||
self.logger.error(f"이미지 URL 추출 & 옵션데이터 입력 처리 중 오류: {e}", exc_info=True)
|
||||
return image_urls if image_urls else []
|
||||
|
||||
def paste_image_in_chrome(self, clipboardImageManager, url, is_success_translated, toggle_states, is_watermark=False, watermark_text= ""):
|
||||
"""크롬으로 포커스를 옮기고 클립보드의 이미지를 붙여넣고 엔터 입력"""
|
||||
self.logger.debug("크롬으로 포커스를 옮기고 클립보드의 이미지를 붙여넣고 엔터 입력")
|
||||
try:
|
||||
self.switch_to_chrome() # 크롬으로 포커스 이동
|
||||
clipboardImageManager.process_clipboard(original_url=url, is_success_translated=is_success_translated, toggle_states=toggle_states) # 클립보드 내용을 처리
|
||||
# clipboard_content = pyperclip.paste()
|
||||
if clipboardImageManager.is_clipboard_image():
|
||||
pyautogui.hotkey('ctrl', 'v') # 클립보드 이미지 붙여넣기
|
||||
self.logger.info("이미지 붙여넣기 완료.")
|
||||
pyautogui.press('right') # 오른쪽 입력
|
||||
self.logger.debug("이미지 붙여넣기 완료.")
|
||||
clipboardImageManager.clear_clipboard()
|
||||
self.logger.info("이미지 붙여넣기 완료로 클립보드 비우기.")
|
||||
return True
|
||||
else:
|
||||
self.logger.warning("클립보드가 비어있습니다.")
|
||||
return False
|
||||
except Exception as e:
|
||||
self.logger.error(f"이미지 붙여넣기 중 오류: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
async def save_and_ecs_product_edit(self):
|
||||
"""상품 수정 후 저장 버튼 클릭"""
|
||||
try:
|
||||
await self.page.click(self.save_button_locator)
|
||||
await self.page.keyboard.press("Escape")
|
||||
self.logger.info("상품 수정 내용 저장 및 ECS 완료.")
|
||||
except Exception as e:
|
||||
self.logger.error(f"저장 버튼 클릭 중 오류: {e}", exc_info=True)
|
||||
|
||||
async def save_product_edit(self):
|
||||
"""상품 수정 후 저장 버튼 클릭"""
|
||||
try:
|
||||
await self.page.click(self.save_button_locator)
|
||||
self.logger.info("상품 수정 내용 저장 완료.")
|
||||
except Exception as e:
|
||||
self.logger.error(f"저장 버튼 클릭 중 오류: {e}", exc_info=True)
|
||||
|
||||
async def go_to_next_page(self):
|
||||
"""다음 페이지로 이동"""
|
||||
try:
|
||||
# 현재 페이지가 몇 번째 페이지인지 확인 (클래스에 'ant-pagination-item-active'가 있는 요소)
|
||||
current_page = await self.page.query_selector(self.current_page_locator)
|
||||
|
||||
if not current_page:
|
||||
self.logger.warning("현재 페이지 정보를 찾을 수 없습니다.")
|
||||
return False
|
||||
|
||||
# 현재 활성화된 페이지 번호를 가져옴
|
||||
current_page_number = int(await current_page.get_attribute("title"))
|
||||
self.logger.info(f"현재페이지 : [{current_page_number}]")
|
||||
|
||||
next_page_number = current_page_number + 1
|
||||
|
||||
# 다음 페이지 버튼을 찾음 (title 속성으로 다음 페이지를 찾음)
|
||||
next_page_button_locator = self.next_page_button_template.format(page_number=next_page_number)
|
||||
next_page_button = await self.page.query_selector(next_page_button_locator)
|
||||
|
||||
if next_page_button:
|
||||
await next_page_button.click() # 페이지 버튼 클릭
|
||||
# await self.page.wait_for_load_state('domcontentloaded') # 페이지 로딩이 완료될 때까지 대기
|
||||
time.sleep(3)
|
||||
self.logger.info(f"페이지 {next_page_number}로 이동 완료.")
|
||||
return True
|
||||
else:
|
||||
self.logger.warning("다음 페이지가 없습니다.")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"다음 페이지로 이동 중 오류 발생: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def switch_to_chrome(self):
|
||||
"""크롬으로 포커스 전환"""
|
||||
try:
|
||||
if not self.chrome_hwnd:
|
||||
self.chrome_hwnd = self.find_window_by_title(self.chrome_window_name)
|
||||
if self.chrome_hwnd:
|
||||
win32gui.ShowWindow(self.chrome_hwnd, win32con.SW_RESTORE)
|
||||
win32gui.SetForegroundWindow(self.chrome_hwnd)
|
||||
self.logger.debug('크롬 창으로 포커스 이동.')
|
||||
else:
|
||||
self.logger.warning('크롬 창을 찾을 수 없습니다.')
|
||||
except Exception as e:
|
||||
self.logger.error(f"크롬 포커스 전환 중 오류: {e}", exc_info=True)
|
||||
|
||||
async def scroll_with_wheel(self, direction="down", pause_time=0.5, max_scrolls=50):
|
||||
"""
|
||||
휠 스크롤을 사용하여 페이지를 위나 아래로 천천히 스크롤.
|
||||
|
||||
Parameters:
|
||||
- direction: 스크롤 방향 ("down"은 아래로, "up"은 위로).
|
||||
- pause_time: 스크롤 사이의 대기 시간 (초).
|
||||
- max_scrolls: 최대 스크롤 횟수.
|
||||
"""
|
||||
scroll_count = 0
|
||||
|
||||
self.logger.debug(f"스크롤 시작")
|
||||
|
||||
# 현재 페이지 높이 가져오기
|
||||
last_height = await self.page.evaluate("document.body.scrollHeight")
|
||||
self.logger.debug(f"현재 페이지 높이 가져오기 - {last_height}")
|
||||
|
||||
while scroll_count < max_scrolls:
|
||||
if direction == "down":
|
||||
# 아래로 스크롤
|
||||
self.logger.debug(f"scroll_count[{scroll_count}]회 : 휠 아래로 1000px")
|
||||
await self.page.evaluate("window.scrollBy(0, 1000);")
|
||||
elif direction == "up":
|
||||
# 위로 스크롤
|
||||
self.logger.debug(f"scroll_count[{scroll_count}]회 : 휠 위로 1000px")
|
||||
await self.page.evaluate("window.scrollBy(0, -1000);")
|
||||
else:
|
||||
raise ValueError("direction 인자는 'down' 또는 'up'만 허용됩니다.")
|
||||
|
||||
self.logger.debug(f"pause_time 슬립 : {pause_time}")
|
||||
await asyncio.sleep(pause_time)
|
||||
|
||||
# 새로운 페이지 높이 가져오기
|
||||
new_height = await self.page.evaluate("document.body.scrollHeight")
|
||||
self.logger.debug(f"새로운 페이지 높이 가져오기 - {new_height}")
|
||||
|
||||
# 스크롤이 더 이상 필요 없는 경우(페이지 끝에 도달)
|
||||
if direction == "down" and new_height == last_height:
|
||||
self.logger.debug(f"페이지 끝에 도달했습니다. 스크롤 횟수: {scroll_count}")
|
||||
break
|
||||
elif direction == "up" and new_height == 0:
|
||||
self.logger.debug(f"페이지 시작에 도달했습니다. 스크롤 횟수: {scroll_count}")
|
||||
break
|
||||
|
||||
self.logger.debug(f"새로운 페이지 높이를 현재높이로 재설정 - {new_height}")
|
||||
last_height = new_height
|
||||
scroll_count += 1
|
||||
self.logger.debug(f"스크롤 카운트 + 1")
|
||||
|
||||
if scroll_count == max_scrolls:
|
||||
self.logger.debug("최대 스크롤 횟수에 도달했습니다.")
|
||||
|
||||
async def scroll_with_keyboard(self, direction="down", pause_time=0.5, max_scrolls=50):
|
||||
"""
|
||||
키보드를 사용하여 페이지를 위나 아래로 천천히 스크롤.
|
||||
|
||||
Parameters:
|
||||
- direction: 스크롤 방향 ("down"은 아래로, "up"은 위로).
|
||||
- pause_time: 스크롤 사이의 대기 시간 (초).
|
||||
- max_scrolls: 최대 스크롤 횟수.
|
||||
"""
|
||||
scroll_count = 0
|
||||
|
||||
while scroll_count < max_scrolls:
|
||||
if direction == "down":
|
||||
# 아래로 스크롤 (Page Down 키 사용)
|
||||
await self.page.keyboard.press("PageDown")
|
||||
elif direction == "up":
|
||||
# 위로 스크롤 (Page Up 키 사용)
|
||||
await self.page.keyboard.press("PageUp")
|
||||
else:
|
||||
raise ValueError("direction 인자는 'down' 또는 'up'만 허용됩니다.")
|
||||
|
||||
await asyncio.sleep(pause_time)
|
||||
|
||||
scroll_count += 1
|
||||
|
||||
if scroll_count == max_scrolls:
|
||||
self.logger.debug("최대 스크롤 횟수에 도달했습니다.")
|
||||
|
||||
|
||||
async def collect_product_info(self, items_per_page, ed_mode):
|
||||
"""
|
||||
상품 정보를 수집하는 메서드
|
||||
"""
|
||||
try:
|
||||
product_infos = []
|
||||
product_name_elements = [] # product_name_element를 저장할 리스트
|
||||
|
||||
# ed_mode에 따라 product_elements 설정
|
||||
if ed_mode:
|
||||
# 각 상품의 이름, 가격, 이미지를 위한 선택자 리스트 구성 (index가 2부터 시작)
|
||||
product_elements = [
|
||||
{
|
||||
"name": self.product_name_for_ed_template.format(index=i),
|
||||
"price": self.product_price_for_ed_template.format(index=i),
|
||||
"image": self.product_image_for_ed_template.format(index=i)
|
||||
}
|
||||
for i in range(2, items_per_page + 2) # index가 2부터 시작하도록 설정
|
||||
]
|
||||
else:
|
||||
# ed_mode=False일 때는 각 상품의 부모 요소를 모두 선택
|
||||
product_elements = await self.page.query_selector_all(self.product_parent_locator)
|
||||
|
||||
for i, element in enumerate(product_elements[:items_per_page], start=1):
|
||||
try:
|
||||
if ed_mode:
|
||||
# ed_mode=True일 때는 각 상품의 개별 선택자 사용
|
||||
product_name_element = await self.page.wait_for_selector(element["name"], timeout=3000, state="attached")
|
||||
product_price_element = await self.page.wait_for_selector(element["price"], timeout=3000, state="attached")
|
||||
product_image_element = await self.page.wait_for_selector(element["image"], timeout=3000, state="attached")
|
||||
else:
|
||||
# ed_mode=False일 때 부모 요소 내의 선택자를 사용
|
||||
product_name_element = await self.page.wait_for_selector(self.product_name_inner_locator, timeout=3000, state="attached")
|
||||
product_price_element = await self.page.wait_for_selector(self.product_price_inner_locator, timeout=3000, state="attached")
|
||||
product_image_element = await self.page.wait_for_selector(self.product_image_inner_locator, timeout=3000, state="attached")
|
||||
|
||||
# 요소가 존재하면 정보 추출
|
||||
self.logger.debug(f"product_name_element : {product_name_element}")
|
||||
self.logger.debug(f"product_price_element : {product_price_element}")
|
||||
self.logger.debug(f"product_image_element : {product_image_element}")
|
||||
|
||||
if product_name_element and product_price_element and product_image_element:
|
||||
# await의 결과를 각 변수에 저장
|
||||
product_name_text = (await product_name_element.inner_text()).strip()
|
||||
product_price_text = (await product_price_element.inner_text()).strip()
|
||||
product_image_url = await product_image_element.get_attribute('src')
|
||||
|
||||
# product_info 딕셔너리에 결과 저장
|
||||
product_info = {
|
||||
"name": product_name_text,
|
||||
"price": product_price_text,
|
||||
"image_url": product_image_url
|
||||
}
|
||||
self.logger.debug(f"상품 {i}: {product_info}")
|
||||
product_infos.append(product_info)
|
||||
product_name_elements.append(product_name_element) # 각 product_name_element 추가
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"상품 {i} 정보 수집 중 오류 발생: {e}", exc_info=True)
|
||||
continue
|
||||
|
||||
return product_infos, product_name_elements # product_infos와 product_name_elements 함께 반환
|
||||
except Exception as e:
|
||||
self.logger.error(f"상품 정보 수집 중 오류 발생: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
|
||||
|
||||
|
||||
async def scroll_page_to_bottom(self, pause_time=0.2):
|
||||
"""페이지의 맨 아래까지 스크롤하여 모든 동적 요소를 로드"""
|
||||
self.logger.info('페이지 스크롤 시작...')
|
||||
previous_height = await self.page.evaluate("() => document.body.scrollHeight")
|
||||
|
||||
while True:
|
||||
await self.page.evaluate("window.scrollBy(0, window.innerHeight);") # 한 화면씩 스크롤
|
||||
await asyncio.sleep(pause_time) # 페이지 로딩 대기
|
||||
current_height = await self.page.evaluate("() => document.body.scrollHeight")
|
||||
if current_height == previous_height:
|
||||
break # 더 이상 스크롤할 내용이 없으면 종료
|
||||
previous_height = current_height
|
||||
self.logger.info('페이지 스크롤 완료.')
|
||||
|
||||
async def scroll_page_to_top(self, pause_time=0.2):
|
||||
"""페이지의 맨 위까지 스크롤"""
|
||||
self.logger.info('페이지 위로 스크롤 시작...')
|
||||
previous_height = await self.page.evaluate("() => window.pageYOffset")
|
||||
|
||||
while previous_height > 0:
|
||||
await self.page.evaluate("window.scrollBy(0, -window.innerHeight);") # 한 화면씩 위로 스크롤
|
||||
await asyncio.sleep(pause_time) # 페이지 로딩 대기
|
||||
current_height = await self.page.evaluate("() => window.pageYOffset")
|
||||
if current_height == previous_height:
|
||||
break # 더 이상 스크롤할 내용이 없으면 종료
|
||||
previous_height = current_height
|
||||
|
||||
self.logger.info('페이지 위로 스크롤 완료.')
|
||||
|
||||
|
||||
def run(self):
|
||||
asyncio.run(self.start_browser())
|
||||
|
||||
def terminate(self):
|
||||
self.logger.info("크롬 스레드 종료")
|
||||
self.cleanup() # 종료 시 추가 정리 작업 호출
|
||||
super().terminate()
|
||||
|
||||
def cleanup(self):
|
||||
if self.whale_browser:
|
||||
self.logger.info("Whale 브라우저 창 닫기 시도 중...")
|
||||
self.whale_browser.close_whale_window_if_exists()
|
||||
|
||||
|
|
@ -202,7 +202,7 @@ class ClipboardImageManager:
|
|||
self.logger.debug("이미지 다운로드 최대 재시도 횟수를 초과했습니다.")
|
||||
return None
|
||||
|
||||
def process_clipboard(self, original_url, is_success_translated, toggle_states, path=None):
|
||||
def process_clipboard(self, original_url, is_success_translated, toggle_states, path=None, is_thumb=False):
|
||||
"""클립보드의 내용을 처리하고, 필요한 경우 이미지 변환, 크롭 또는 클립보드 비우기"""
|
||||
|
||||
try:
|
||||
|
|
@ -232,10 +232,10 @@ class ClipboardImageManager:
|
|||
# 가로 크기가 200픽셀 이상이면 크롭
|
||||
if width >= 200:
|
||||
self.logger.debug("이미지 가로 크기 200픽셀 이상: 크롭 진행 중...")
|
||||
cropped_image = self.crop_image(image) # 크롭 메서드 사용
|
||||
cropped_image = self.crop_image(image, is_thumb) # 크롭 메서드 사용
|
||||
|
||||
# 워터마크 추가
|
||||
if is_watermark:
|
||||
if is_watermark and not is_thumb: # is_thumb가 True라면 워터마크 추가를 건너뜁니다
|
||||
self.logger.debug("워터마크 추가 중...")
|
||||
cropped_watermark_image = self.add_watermark(cropped_image, watermark_text, opacity_percent) # 워터마크 추가
|
||||
cropped_image = cropped_watermark_image
|
||||
|
|
@ -260,10 +260,10 @@ class ClipboardImageManager:
|
|||
|
||||
if width >= 200:
|
||||
self.logger.debug("이미지 가로 크기 200픽셀 이상: 크롭 진행 중...")
|
||||
cropped_image = self.crop_image(image) # 크롭 메서드 사용
|
||||
cropped_image = self.crop_image(image, is_thumb) # 크롭 메서드 사용
|
||||
|
||||
# 워터마크 추가
|
||||
if is_watermark:
|
||||
if is_watermark and not is_thumb: # is_thumb가 True라면 워터마크 추가를 건너뜁니다
|
||||
self.logger.debug("워터마크 추가 중...")
|
||||
cropped_watermark_image = self.add_watermark(cropped_image, watermark_text, opacity_percent) # 워터마크 추가
|
||||
cropped_image = cropped_watermark_image
|
||||
|
|
@ -342,8 +342,12 @@ class ClipboardImageManager:
|
|||
finally:
|
||||
win32clipboard.CloseClipboard()
|
||||
|
||||
def crop_image(self, image, crop_percentage=0.01):
|
||||
def crop_image(self, image, is_thumb=False, crop_percentage=0.01):
|
||||
"""이미지를 주어진 퍼센트만큼 크롭하는 함수"""
|
||||
if is_thumb:
|
||||
crop_percentage = 0.03
|
||||
self.logger.debug(f"썸네일 이미지 이므로 크롭 3%로 조정")
|
||||
|
||||
width, height = image.size
|
||||
left = width * crop_percentage
|
||||
top = height * crop_percentage
|
||||
|
|
|
|||
9
gui.py
9
gui.py
|
|
@ -503,6 +503,8 @@ class AutoPercentyGUI(QWidget):
|
|||
"""QSettings에 토글 상태 저장"""
|
||||
for key, value in self.toggle_states.items():
|
||||
self.settings.setValue(f"toggle/{key}", value)
|
||||
# 상태가 변경되었을 때 UI를 업데이트
|
||||
self.update_toggle_ui(key)
|
||||
|
||||
def update_toggle_ui(self, key):
|
||||
"""토글 상태에 따라 UI 업데이트"""
|
||||
|
|
@ -753,6 +755,7 @@ class AutoPercentyGUI(QWidget):
|
|||
self.browser_controller.toggle_states = {
|
||||
'optionIMGTrans': self.toggle_states['optionIMGTrans'],
|
||||
'detail_IMGTrans': self.toggle_states['detail_IMGTrans'],
|
||||
'thumb': self.toggle_states['thumb'],
|
||||
'vd_mode': self.toggle_states['vd_mode'],
|
||||
}
|
||||
self.browser_controller.login_infos = {
|
||||
|
|
@ -763,6 +766,9 @@ class AutoPercentyGUI(QWidget):
|
|||
'is_admin': self.admin_toggle.isChecked(),
|
||||
}
|
||||
|
||||
# 로그인 정보 저장
|
||||
self.save_settings()
|
||||
|
||||
# 스레드 시작
|
||||
self.browser_controller.start_browser_task()
|
||||
else:
|
||||
|
|
@ -772,9 +778,10 @@ class AutoPercentyGUI(QWidget):
|
|||
def on_browser_started(self):
|
||||
"""브라우저 시작 완료 시 처리할 로직"""
|
||||
self.logger.debug("브라우저가 성공적으로 시작되었습니다.")
|
||||
# 버튼 상태 활성화
|
||||
# 버튼 상태 활성화&비활성화
|
||||
self.PercentyJob_button.setEnabled(True)
|
||||
self.pause_button.setEnabled(True)
|
||||
self.start_chrome_button.setEnabled(False)
|
||||
|
||||
@Slot(str)
|
||||
def on_browser_error(self, error_message):
|
||||
|
|
|
|||
971
gui2.py
971
gui2.py
|
|
@ -1,971 +0,0 @@
|
|||
from PySide6.QtWidgets import QInputDialog, QWidget, QSpinBox, QPushButton, QVBoxLayout, QGridLayout, QTextEdit, QLabel, QLineEdit, QHBoxLayout, QProgressBar, QSizePolicy
|
||||
from PySide6.QtCore import Qt, Slot, QRect, QSettings, QTimer
|
||||
from toggleSwitch import ToggleSwitch
|
||||
from browser_control import BrowserController
|
||||
from whale_translator import WhaleTranslator
|
||||
from clipboardImageManager import ClipboardImageManager
|
||||
from vertexAI import VertexAITranslator
|
||||
from option import OptionHandler
|
||||
from price import PriceHandler
|
||||
from title import TitleHandler
|
||||
from locatorManager import LocatorManager
|
||||
from src.cmb_diag import CMBSettingsDialog
|
||||
from src.DatabaseManager import DatabaseManager
|
||||
from logger_module import QTextEditLogger # 추가
|
||||
import logging
|
||||
import asyncio, sys
|
||||
import os, shutil, time
|
||||
|
||||
class AutoPercentyGUI(QWidget):
|
||||
def __init__(self, logger=None, app=None):
|
||||
super().__init__()
|
||||
self.initUI()
|
||||
self.app = app
|
||||
self.logger = logger
|
||||
self.debug = False
|
||||
|
||||
self.login_infos={
|
||||
'admin_id' : None,
|
||||
'admin_pw' : None,
|
||||
'user_id' : None,
|
||||
'user_pw' : None,
|
||||
'is_admin' : False,
|
||||
}
|
||||
|
||||
# 토글 상태를 저장할 딕셔너리 초기화
|
||||
self.toggle_states = {
|
||||
'title': False,
|
||||
'optionTrnas': False,
|
||||
'optionIMGTrans': False,
|
||||
'optionAutoSelect': False,
|
||||
'price': False,
|
||||
'thumb': False,
|
||||
'tag': False,
|
||||
'detail_Option': False,
|
||||
'detail_IMGTrans': False,
|
||||
'debug_mode': False,
|
||||
'recovery_mode': False,
|
||||
'vd_mode': False,
|
||||
'ed_mode': False, # 등록된 상품을 수정할때
|
||||
'watermark': False, # 워터마크 토글 추가
|
||||
'watermark_text': "WaterMark", # 워터마크 텍스트 저장
|
||||
'opacity_percent': 25, # 워터마크 투명도
|
||||
'max_option_count': 20, # 최대 선택가능한 옵션 수
|
||||
}
|
||||
|
||||
|
||||
self.settings = QSettings("WhenRideMycar", "TranslationApp") # QSettings 초기화
|
||||
self.locator_manager = LocatorManager()
|
||||
self.browser_controller = BrowserController(self, self.logger, self.locator_manager, self.login_infos, self.toggle_states)
|
||||
self.vertexAI = VertexAITranslator(self.logger)
|
||||
self.optionHandler = None
|
||||
self.whale_translator = None
|
||||
|
||||
# DB 파일 경로 설정
|
||||
self.base_dir = self.get_base_dir()
|
||||
self.user_db_path = os.path.join(self.base_dir, "userDB.db")
|
||||
self.initial_db_path = os.path.join(self.base_dir, "src", "initialDB.db")
|
||||
|
||||
# userDB.db 생성 (없으면 initialDB.db 복사)
|
||||
self.create_user_db_if_not_exists()
|
||||
|
||||
# DatabaseManager 초기화
|
||||
self.db_manager = DatabaseManager(db_url=f"sqlite:///{self.user_db_path}", logger=self.logger)
|
||||
self.cmb_diag = CMBSettingsDialog(parent=self, logger=self.logger, db_manager=self.db_manager, initial_db_path=self.initial_db_path, user_db_path=self.user_db_path, debug=self.debug)
|
||||
|
||||
self.clipboardImageManager = ClipboardImageManager(self, logger, self.browser_controller, watermark_font_size=36, debug=self.debug)
|
||||
self.optionHandler = OptionHandler(self.locator_manager, self.browser_controller, self.whale_translator, self.clipboardImageManager, self.logger, self.vertexAI, self.debug)
|
||||
self.priceHandler = PriceHandler(self.locator_manager, self.browser_controller, self.logger, self.optionHandler, self.vertexAI, self.cmb_diag, self.debug)
|
||||
self.titleHandler = TitleHandler(self.locator_manager, self.browser_controller, self.logger)
|
||||
self.running = False
|
||||
|
||||
# 변수 설정
|
||||
self.start_time = 0
|
||||
self.finish_time = 0
|
||||
self.total_product_count = 0
|
||||
self.current_product_count = 0
|
||||
self.title_count = 0
|
||||
self.option_count = 0
|
||||
self.price_count = 0
|
||||
self.detail_image_count = 0
|
||||
self.thumb_image_count = 0
|
||||
self.current_options_info = {}
|
||||
|
||||
self.current_stage_index = 0 # 현재 진행 중인 단계 인덱스
|
||||
|
||||
# 이전에 저장된 설정 불러오기
|
||||
self.load_settings()
|
||||
|
||||
# 로거 초기화
|
||||
self.add_text_edit_logger()
|
||||
|
||||
# 프로그래스바 초기화
|
||||
self.update_total_progress(0,0)
|
||||
|
||||
def get_base_dir(self):
|
||||
"""
|
||||
실행 환경에 따라 base_dir을 설정하는 메서드.
|
||||
cx_Freeze로 패키징된 경우 실행 파일의 경로, 일반 Python 환경일 경우 __file__을 기준으로 설정.
|
||||
"""
|
||||
if getattr(sys, 'frozen', False): # 패키징된 경우
|
||||
base_dir = os.path.dirname(sys.executable)
|
||||
else: # 일반 Python 실행 환경
|
||||
base_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
return base_dir
|
||||
|
||||
def create_user_db_if_not_exists(self):
|
||||
"""
|
||||
userDB.db 파일이 없으면 initialDB.db를 복사해서 생성하는 메서드.
|
||||
"""
|
||||
try:
|
||||
if not os.path.exists(self.user_db_path):
|
||||
self.logger.debug("userDB.db 파일이 존재하지 않아 initialDB.db를 복사합니다.")
|
||||
if os.path.exists(self.initial_db_path):
|
||||
shutil.copyfile(self.initial_db_path, self.user_db_path)
|
||||
self.logger.debug("initialDB.db를 userDB.db로 복사했습니다.")
|
||||
else:
|
||||
raise FileNotFoundError(f"{self.initial_db_path} 파일이 없습니다. 초기 DB 파일이 존재하는지 확인해주세요.")
|
||||
except FileNotFoundError as e:
|
||||
self.logger.error(f"DB 초기화 실패: {e}", exc_info=True)
|
||||
raise e
|
||||
except Exception as e:
|
||||
self.logger.error(f"DB 파일 복사 중 오류 발생: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
def add_text_edit_logger(self):
|
||||
"""QTextEdit에 로그를 출력하기 위한 핸들러 추가"""
|
||||
text_edit_logger = QTextEditLogger()
|
||||
text_edit_logger.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
||||
# text_edit_logger.appendHtml.connect(self.log.appendHtml)
|
||||
text_edit_logger.appendHtml.connect(self.log.append) # appendHtml 대신 append로 수정
|
||||
text_edit_logger.scrollToBottom.connect(lambda: self.log.verticalScrollBar().setValue(self.log.verticalScrollBar().maximum()))
|
||||
self.logger.addHandler(text_edit_logger)
|
||||
self.logger.debug('로그기록이 설정되었습니다.')
|
||||
|
||||
def start_stage(self, stage_index):
|
||||
"""지정한 단계에 깜빡임 효과 적용"""
|
||||
if 0 <= stage_index < len(self.stage_labels):
|
||||
self.timer = QTimer(self)
|
||||
self.blink_status = True
|
||||
self.timer.timeout.connect(lambda: self.blink_stage(stage_index))
|
||||
self.timer.start(500) # 0.5초 간격으로 깜빡임
|
||||
|
||||
def blink_stage(self, stage_index):
|
||||
"""지정한 단계의 색상을 주기적으로 변경하여 깜빡임 효과를 적용"""
|
||||
label = self.stage_labels[stage_index]
|
||||
if self.blink_status:
|
||||
label.setStyleSheet("background-color: yellow; padding: 5px;")
|
||||
else:
|
||||
label.setStyleSheet("background-color: lightgray; padding: 5px;")
|
||||
self.blink_status = not self.blink_status
|
||||
|
||||
def stop_blinking_effect(self):
|
||||
"""깜빡임 효과 중지"""
|
||||
self.timer.stop()
|
||||
|
||||
def complete_stage(self, stage_index):
|
||||
"""단계 완료 시 깜빡임을 중지하고 완료 상태로 변경"""
|
||||
if 0 <= stage_index < len(self.stage_labels):
|
||||
self.stop_blinking_effect()
|
||||
label = self.stage_labels[stage_index]
|
||||
label.setStyleSheet("background-color: green; padding: 5px;")
|
||||
self.current_stage_index += 1
|
||||
|
||||
# 다음 단계로 이동하여 깜빡임 시작
|
||||
if self.current_stage_index < len(self.stages):
|
||||
self.start_stage(self.current_stage_index)
|
||||
|
||||
def initUI(self):
|
||||
self.setWindowFlags(Qt.WindowStaysOnTopHint)
|
||||
self.setGeometry(QRect(500, 600, 380, 700))
|
||||
self.setWindowTitle('AutoPecenty2')
|
||||
|
||||
# 로그
|
||||
self.log = QTextEdit(self)
|
||||
self.log.setReadOnly(True)
|
||||
|
||||
# 전체 프로그레스바
|
||||
self.total_progress_bar = QProgressBar(self)
|
||||
self.total_progress_bar.setValue(0)
|
||||
self.total_progress_bar.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed)
|
||||
|
||||
# 스테이지 타임라인
|
||||
self.stageTimeline_layout = QHBoxLayout()
|
||||
# self.stages = ["상품명", "옵션", "가격", "썸네일", "상페"]
|
||||
self.stages = ["옵션", "상페"]
|
||||
self.stage_labels = []
|
||||
|
||||
for stage in self.stages:
|
||||
# self.stage_layout = QHBoxLayout()
|
||||
label = QLabel(stage)
|
||||
label.setStyleSheet("background-color: lightgray; padding: 5px;")
|
||||
self.stage_labels.append(label)
|
||||
|
||||
# self.stage_layout.addWidget(label)
|
||||
# self.stageTimeline_layout.addLayout(self.stage_layout)
|
||||
self.stageTimeline_layout.addWidget(label) # 수정: QLabel을 추가할 때 addWidget() 사용
|
||||
|
||||
# 디테일 프로그레스바
|
||||
self.detail_progress_bar = QProgressBar(self)
|
||||
self.detail_progress_bar.setValue(0)
|
||||
self.detail_progress_bar.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed)
|
||||
self.detail_progress_bar.setVisible(False)
|
||||
|
||||
# 동작옵션 토글 및 레이블 설정
|
||||
self.toggle_layout = QGridLayout()
|
||||
|
||||
# 상품명 수정 토글
|
||||
self.title_toggle_label = QLabel("상품명 수정", self)
|
||||
self.title_toggle = ToggleSwitch(self)
|
||||
self.title_toggle.clicked.connect(lambda checked: self.on_toggle_clicked_generic('title', checked))
|
||||
self.toggle_layout.addWidget(self.title_toggle_label, 0, 0)
|
||||
self.toggle_layout.addWidget(self.title_toggle, 0, 1)
|
||||
|
||||
# 옵션명 AI번역 토글
|
||||
self.optionTrnas_toggle_label = QLabel("옵션명 AI번역", self)
|
||||
self.optionTrnas_toggle = ToggleSwitch(self)
|
||||
self.optionTrnas_toggle.clicked.connect(lambda checked: self.on_toggle_clicked_generic('optionTrnas', checked))
|
||||
self.toggle_layout.addWidget(self.optionTrnas_toggle_label, 0, 2)
|
||||
self.toggle_layout.addWidget(self.optionTrnas_toggle, 0, 3)
|
||||
|
||||
# 옵션이미지 번역 토글
|
||||
self.optionIMGTrans_toggle_label = QLabel("옵션이미지 번역", self)
|
||||
self.optionIMGTrans_toggle = ToggleSwitch(self)
|
||||
self.optionIMGTrans_toggle.clicked.connect(lambda checked: self.on_toggle_clicked_generic('optionIMGTrans', checked))
|
||||
self.toggle_layout.addWidget(self.optionIMGTrans_toggle_label, 1, 0)
|
||||
self.toggle_layout.addWidget(self.optionIMGTrans_toggle, 1, 1)
|
||||
|
||||
# 옵션Auto선택 토글
|
||||
self.optionAutoSelect_toggle_label = QLabel("옵션 Auto선택", self)
|
||||
self.optionAutoSelect_toggle = ToggleSwitch(self)
|
||||
self.optionAutoSelect_toggle.clicked.connect(lambda checked: self.on_toggle_clicked_generic('optionAutoSelect', checked))
|
||||
self.toggle_layout.addWidget(self.optionAutoSelect_toggle_label, 1, 2)
|
||||
self.toggle_layout.addWidget(self.optionAutoSelect_toggle, 1, 3)
|
||||
|
||||
# 가격 수정 토글
|
||||
self.price_toggle_label = QLabel("가격 수정", self)
|
||||
self.price_toggle = ToggleSwitch(self)
|
||||
self.price_toggle.clicked.connect(lambda checked: self.on_toggle_clicked_generic('price', checked))
|
||||
self.toggle_layout.addWidget(self.price_toggle_label, 2, 0)
|
||||
self.toggle_layout.addWidget(self.price_toggle, 2, 1)
|
||||
|
||||
# 썸네일 AI수정 토글
|
||||
self.thumb_toggle_label = QLabel("썸네일 AI수정", self)
|
||||
self.thumb_toggle = ToggleSwitch(self)
|
||||
self.thumb_toggle.clicked.connect(lambda checked: self.on_toggle_clicked_generic('thumb', checked))
|
||||
self.toggle_layout.addWidget(self.thumb_toggle_label, 2, 2)
|
||||
self.toggle_layout.addWidget(self.thumb_toggle, 2, 3)
|
||||
|
||||
# 태그 수정 토글
|
||||
self.tag_toggle_label = QLabel("태그 수정", self)
|
||||
self.tag_toggle = ToggleSwitch(self)
|
||||
self.tag_toggle.clicked.connect(lambda checked: self.on_toggle_clicked_generic('tag', checked))
|
||||
self.toggle_layout.addWidget(self.tag_toggle_label, 3, 0)
|
||||
self.toggle_layout.addWidget(self.tag_toggle, 3, 1)
|
||||
|
||||
# 상페 옵션명 삽입 토글
|
||||
self.detail_Option_toggle_label = QLabel("상세 옵션명 삽입", self)
|
||||
self.detail_Option_toggle = ToggleSwitch(self)
|
||||
self.detail_Option_toggle.clicked.connect(lambda checked: self.on_toggle_clicked_generic('detail_Option', checked))
|
||||
self.toggle_layout.addWidget(self.detail_Option_toggle_label, 3, 2)
|
||||
self.toggle_layout.addWidget(self.detail_Option_toggle, 3, 3)
|
||||
|
||||
# 상페 이미지 번역 토글
|
||||
self.detail_IMGTrans_toggle_label = QLabel("상세 이미지 번역", self)
|
||||
self.detail_IMGTrans_toggle = ToggleSwitch(self)
|
||||
self.detail_IMGTrans_toggle.clicked.connect(lambda checked: self.on_toggle_clicked_generic('detail_IMGTrans', checked))
|
||||
self.toggle_layout.addWidget(self.detail_IMGTrans_toggle_label, 4, 0)
|
||||
self.toggle_layout.addWidget(self.detail_IMGTrans_toggle, 4, 1)
|
||||
|
||||
# 디버그 모드 토글
|
||||
self.debug_toggle_label = QLabel("디버그 모드", self)
|
||||
self.debug_toggle = ToggleSwitch(self)
|
||||
self.debug_toggle.clicked.connect(lambda checked: self.on_toggle_clicked_generic('debug_mode', checked))
|
||||
self.toggle_layout.addWidget(self.debug_toggle_label, 4, 2)
|
||||
self.toggle_layout.addWidget(self.debug_toggle, 4, 3)
|
||||
|
||||
# 수정등록 모드 토글
|
||||
self.ed_mode_toggle_label = QLabel("수정등록 모드", self)
|
||||
self.ed_mode_toggle = ToggleSwitch(self)
|
||||
self.ed_mode_toggle.clicked.connect(lambda checked: self.on_toggle_clicked_generic('ed_mode', checked))
|
||||
self.toggle_layout.addWidget(self.ed_mode_toggle_label, 5, 0)
|
||||
self.toggle_layout.addWidget(self.ed_mode_toggle, 5, 1)
|
||||
|
||||
# VD 모드 토글
|
||||
self.vd_mode_toggle_label = QLabel("VD 모드", self)
|
||||
self.vd_mode_toggle = ToggleSwitch(self)
|
||||
self.vd_mode_toggle.clicked.connect(lambda checked: self.on_toggle_clicked_generic('vd_mode', checked))
|
||||
self.toggle_layout.addWidget(self.vd_mode_toggle_label, 5, 2)
|
||||
self.toggle_layout.addWidget(self.vd_mode_toggle, 5, 3)
|
||||
self.vd_mode_toggle.setVisible(False)
|
||||
self.vd_mode_toggle_label.setVisible(False)
|
||||
|
||||
# recovery 모드 토글
|
||||
self.recovery_mode_toggle_label = QLabel("복구 모드", self)
|
||||
self.recovery_mode_toggle = ToggleSwitch(self)
|
||||
self.recovery_mode_toggle.clicked.connect(lambda checked: self.on_toggle_clicked_generic('recovery_mode', checked))
|
||||
self.toggle_layout.addWidget(self.recovery_mode_toggle_label, 6, 2)
|
||||
self.toggle_layout.addWidget(self.recovery_mode_toggle, 6, 3)
|
||||
|
||||
# 워터마크 토글 추가
|
||||
self.watermark_toggle_label = QLabel("워터마크", self)
|
||||
self.watermark_toggle = ToggleSwitch(self)
|
||||
self.watermark_toggle.clicked.connect(lambda checked: self.on_toggle_clicked_generic('watermark', checked))
|
||||
self.toggle_layout.addWidget(self.watermark_toggle_label, 6, 0)
|
||||
self.toggle_layout.addWidget(self.watermark_toggle, 6, 1)
|
||||
|
||||
|
||||
# 워터마크 관련 UI 요소 생성
|
||||
self.watermark_text_label = QLabel("회사 이름", self)
|
||||
self.watermark_text_input = QLineEdit(self)
|
||||
self.watermark_text_input.returnPressed.connect(self.update_watermark_text)
|
||||
self.watermark_confirm_button = QPushButton("확인", self)
|
||||
|
||||
# 확인 버튼 클릭 시 watermark_text 업데이트
|
||||
self.watermark_confirm_button.clicked.connect(self.update_watermark_text)
|
||||
|
||||
# 최대 옵션수
|
||||
self.max_option_count_label = QLabel("최대옵션수", self)
|
||||
self.max_option_count_input = QSpinBox(self)
|
||||
self.max_option_count_input.setMinimum(0) # 최소값 0
|
||||
self.max_option_count_input.setMaximum(100) # 최대값 100
|
||||
self.max_option_count_input.setValue(20) # 기본값 0
|
||||
self.max_option_count_input.setToolTip("0으로 설정시 최대") # 툴팁 설정
|
||||
self.max_option_count_input.valueChanged.connect(self.update_max_option_count) # 값 변경 시 update_max_option_count 메서드 호출
|
||||
|
||||
# 워터마크 관련 요소들을 하나의 QHBoxLayout에 추가 (비율 2:3:1)
|
||||
watermark_layout = QHBoxLayout()
|
||||
watermark_layout.addWidget(self.watermark_text_label, 2)
|
||||
watermark_layout.addWidget(self.watermark_text_input, 3)
|
||||
watermark_layout.addWidget(self.watermark_confirm_button, 1)
|
||||
|
||||
# 필요한 레이아웃에 추가 (toggle_layout에 추가)
|
||||
self.toggle_layout.addLayout(watermark_layout, 7, 0, 1, 4)
|
||||
self.toggle_layout.addWidget(self.max_option_count_label, 8, 0)
|
||||
self.toggle_layout.addWidget(self.max_option_count_input, 8, 1)
|
||||
|
||||
# 초기에는 워터마크 입력창과 버튼 숨김
|
||||
self.toggle_visibility(False, [(self.watermark_text_input, self.watermark_text_label), (self.watermark_confirm_button, None)])
|
||||
|
||||
|
||||
# 관리자 토글
|
||||
self.admin_toggle = ToggleSwitch(self)
|
||||
self.admin_toggle.clicked.connect(self.on_admin_toggle_clicked)
|
||||
|
||||
# 관리자 ID 및 PW
|
||||
self.admin_id_label = QLabel("관리자 ID:", self)
|
||||
self.admin_id_input = QLineEdit(self)
|
||||
|
||||
# 관리자 PW
|
||||
self.admin_pw_label = QLabel("관리자 PW:", self)
|
||||
self.admin_pw_input = QLineEdit(self)
|
||||
self.admin_pw_input.setEchoMode(QLineEdit.Password)
|
||||
|
||||
# 직원 ID 및 PW
|
||||
self.user_id_label = QLabel("직원 ID:", self)
|
||||
self.user_id_input = QLineEdit(self)
|
||||
self.user_pw_label = QLabel("직원 PW:", self)
|
||||
self.user_pw_input = QLineEdit(self)
|
||||
self.user_pw_input.setEchoMode(QLineEdit.Password)
|
||||
|
||||
# 크롬 실행 버튼 및 번역 버튼
|
||||
self.start_chrome_button = QPushButton('크롬 실행', self)
|
||||
self.translate_button = QPushButton('번역 시작', self)
|
||||
self.translate_button.setEnabled(False)
|
||||
self.translate_button.setStyleSheet("""
|
||||
QPushButton:disabled {
|
||||
color: gray;
|
||||
background-color: lightgray;
|
||||
border: 1px solid gray;
|
||||
}
|
||||
""")
|
||||
self.pause_button = QPushButton('일시정지', self)
|
||||
self.pause_button.setEnabled(False)
|
||||
self.pause_button.setStyleSheet("""
|
||||
QPushButton:disabled {
|
||||
color: gray;
|
||||
background-color: lightgray;
|
||||
border: 1px solid gray;
|
||||
}
|
||||
""")
|
||||
self.cmb_button = QPushButton('크무비설정', self)
|
||||
self.cmb_test_button = QPushButton('크무비테스트', self)
|
||||
|
||||
# 버튼 크기를 1.5배로 설정
|
||||
button_height = int(self.start_chrome_button.sizeHint().height() * 1.5)
|
||||
self.start_chrome_button.setFixedHeight(button_height)
|
||||
self.translate_button.setFixedHeight(button_height)
|
||||
self.pause_button.setFixedHeight(button_height)
|
||||
self.cmb_button.setFixedHeight(button_height)
|
||||
|
||||
# 메인 레이아웃 설정
|
||||
self.main_layout = QVBoxLayout()
|
||||
|
||||
# 관리자 토글 버튼 및 로그인 관련 필드 추가
|
||||
self.admin_toggle_layout = QHBoxLayout()
|
||||
self.admin_toggle_layout.addWidget(QLabel("관리자 여부:", self))
|
||||
self.admin_toggle_layout.addWidget(self.admin_toggle)
|
||||
self.main_layout.addLayout(self.admin_toggle_layout,1)
|
||||
|
||||
# 관리자 ID
|
||||
self.main_layout.addWidget(self.admin_id_label)
|
||||
self.main_layout.addWidget(self.admin_id_input)
|
||||
|
||||
# 관리자 PW
|
||||
self.admin_layout = QVBoxLayout()
|
||||
self.admin_layout.addWidget(self.admin_pw_label)
|
||||
self.admin_layout.addWidget(self.admin_pw_input)
|
||||
|
||||
# 직원 ID/PW
|
||||
self.user_layout = QVBoxLayout()
|
||||
self.user_layout.addWidget(self.user_id_label)
|
||||
self.user_layout.addWidget(self.user_id_input)
|
||||
self.user_layout.addWidget(self.user_pw_label)
|
||||
self.user_layout.addWidget(self.user_pw_input)
|
||||
|
||||
# 관리자와 직원 레이아웃을 메인 레이아웃에 추가
|
||||
self.main_layout.addLayout(self.admin_layout,3)
|
||||
self.main_layout.addLayout(self.user_layout,3)
|
||||
|
||||
# 크롬 및 번역 관련 버튼
|
||||
self.button_layout = QHBoxLayout()
|
||||
self.button_layout.addWidget(self.start_chrome_button)
|
||||
self.button_layout.addWidget(self.translate_button)
|
||||
self.button_layout.addWidget(self.pause_button)
|
||||
self.button_layout.addWidget(self.cmb_button)
|
||||
self.button_layout.addWidget(self.cmb_test_button)
|
||||
|
||||
# 로그 및 프로그레스바 레이아웃
|
||||
self.log_layout = QVBoxLayout()
|
||||
self.log_layout.addWidget(self.log)
|
||||
self.log_layout.addWidget(self.total_progress_bar)
|
||||
self.log_layout.addLayout(self.stageTimeline_layout)
|
||||
self.log_layout.addWidget(self.detail_progress_bar)
|
||||
|
||||
|
||||
# 메인 레이아웃에 버튼 레이아웃과 로그 레이아웃 추가
|
||||
self.main_layout.addLayout(self.toggle_layout,2)
|
||||
self.main_layout.addLayout(self.button_layout,2)
|
||||
self.main_layout.addLayout(self.log_layout,5)
|
||||
|
||||
self.setLayout(self.main_layout)
|
||||
|
||||
# 기본 상태 설정
|
||||
self.on_admin_toggle_clicked(False)
|
||||
|
||||
# 버튼 이벤트 연결
|
||||
self.start_chrome_button.clicked.connect(self.start_browser)
|
||||
self.translate_button.clicked.connect(self.start_translation)
|
||||
self.pause_button.clicked.connect(self.pause_translation)
|
||||
# self.exit_button.clicked.connect(self.close)
|
||||
self.cmb_button.clicked.connect(self.on_cmb_button_clicked)
|
||||
self.cmb_test_button.clicked.connect(self.on_cmb_test_button_clicked)
|
||||
|
||||
def load_toggle_settings(self):
|
||||
"""QSettings에서 토글 상태 불러오기"""
|
||||
for key in self.toggle_states.keys():
|
||||
self.toggle_states[key] = self.settings.value(f"toggle/{key}", False, type=bool)
|
||||
self.update_toggle_ui(key)
|
||||
|
||||
def save_toggle_settings(self):
|
||||
"""QSettings에 토글 상태 저장"""
|
||||
for key, value in self.toggle_states.items():
|
||||
self.settings.setValue(f"toggle/{key}", value)
|
||||
|
||||
def update_toggle_ui(self, key):
|
||||
"""토글 상태에 따라 UI 업데이트"""
|
||||
if hasattr(self, f"{key}_toggle"):
|
||||
toggle_widget = getattr(self, f"{key}_toggle")
|
||||
toggle_widget.setChecked(self.toggle_states[key])
|
||||
|
||||
# 워터마크 토글과 연관된 회사 이름 입력란 처리
|
||||
if key == 'watermark':
|
||||
self.on_watermark_toggle_clicked(self.toggle_states[key]) # 워터마크 토글 상태에 맞게 회사 이름 필드 처리
|
||||
|
||||
def update_watermark_visibility(self):
|
||||
"""이미지 번역 토글 중 하나라도 켜져 있으면 워터마크 토글을 보이게 하고, visible이 되면 상태에 따라 레이아웃도 제어"""
|
||||
if self.toggle_states['optionIMGTrans'] or self.toggle_states['detail_IMGTrans'] or self.toggle_states['thumb']:
|
||||
# 이미지 번역 토글이 하나라도 켜져 있으면 워터마크 토글 보이기
|
||||
self.toggle_visibility(True, [(self.watermark_toggle, self.watermark_toggle_label)])
|
||||
|
||||
# 워터마크 토글이 보이게 될 때 상태 확인
|
||||
if self.watermark_toggle.isChecked():
|
||||
# 워터마크 토글이 ON 상태이면 워터마크 레이아웃도 보이게 함
|
||||
self.toggle_visibility(True, [(self.watermark_text_input, self.watermark_text_label), (self.watermark_confirm_button, None)])
|
||||
else:
|
||||
# 워터마크 토글이 OFF 상태이면 워터마크 레이아웃 숨김
|
||||
self.toggle_visibility(False, [(self.watermark_text_input, self.watermark_text_label), (self.watermark_confirm_button, None)])
|
||||
else:
|
||||
# 모두 꺼져 있으면 워터마크 토글과 레이아웃 숨기기
|
||||
self.toggle_visibility(False, [(self.watermark_toggle, self.watermark_toggle_label)])
|
||||
self.toggle_visibility(False, [(self.watermark_text_input, self.watermark_text_label), (self.watermark_confirm_button, None)])
|
||||
|
||||
def toggle_visibility(self, is_checked, toggle_items):
|
||||
"""
|
||||
토글 상태에 따라 여러 필드의 visibility를 제어하는 범용 메서드
|
||||
:param is_checked: 토글 상태 (True/False)
|
||||
:param toggle_items: 토글 필드와 레이블 목록 [(필드, 레이블), ...]
|
||||
"""
|
||||
for item, label in toggle_items:
|
||||
item.setVisible(is_checked)
|
||||
if label:
|
||||
label.setVisible(is_checked)
|
||||
|
||||
def on_toggle_clicked_generic(self, key, is_checked):
|
||||
"""토글 클릭 시 상태 업데이트 및 저장"""
|
||||
self.toggle_states[key] = is_checked
|
||||
|
||||
if is_checked:
|
||||
status_text = "활성화"
|
||||
else:
|
||||
status_text = "비활성화"
|
||||
|
||||
label_text = ""
|
||||
|
||||
# key에 따라 라벨 텍스트를 설정
|
||||
if key == 'title':
|
||||
label_text = self.title_toggle_label.text()
|
||||
elif key == 'optionTrnas':
|
||||
label_text = self.optionTrnas_toggle_label.text()
|
||||
elif key == 'optionIMGTrans':
|
||||
label_text = self.optionIMGTrans_toggle_label.text()
|
||||
elif key == 'optionAutoSelect':
|
||||
label_text = self.optionAutoSelect_toggle_label.text()
|
||||
elif key == 'price':
|
||||
label_text = self.price_toggle_label.text()
|
||||
elif key == 'thumb':
|
||||
label_text = self.thumb_toggle_label.text()
|
||||
elif key == 'tag':
|
||||
label_text = self.tag_toggle_label.text()
|
||||
elif key == 'detail_Option':
|
||||
label_text = self.detail_Option_toggle_label.text()
|
||||
elif key == 'detail_IMGTrans':
|
||||
label_text = self.detail_IMGTrans_toggle_label.text()
|
||||
elif key == 'debug_mode':
|
||||
label_text = self.debug_toggle_label.text()
|
||||
elif key == 'vd_mode':
|
||||
label_text = self.vd_mode_toggle_label.text()
|
||||
elif key == 'recovery_mode':
|
||||
label_text = self.recovery_mode_toggle_label.text()
|
||||
elif key == 'ed_mode':
|
||||
label_text = self.ed_mode_toggle_label.text()
|
||||
elif key == 'watermark':
|
||||
label_text = self.watermark_toggle_label.text()
|
||||
|
||||
# 디버그 로그에 라벨의 텍스트를 출력
|
||||
self.logger.debug(f"{label_text} 버튼 - {status_text} 선택")
|
||||
|
||||
# 이미지 번역 관련 토글이 하나라도 켜져 있으면 워터마크 토글 보이기
|
||||
if key in ['optionIMGTrans', 'detail_IMGTrans', 'thumb']:
|
||||
self.update_watermark_visibility()
|
||||
|
||||
# 워터마크 토글이 켜져 있으면 watermark_layout 보이기
|
||||
if key == 'watermark':
|
||||
self.toggle_visibility(is_checked, [
|
||||
(self.watermark_text_input, self.watermark_text_label),
|
||||
(self.watermark_confirm_button, None)
|
||||
])
|
||||
|
||||
self.save_toggle_settings()
|
||||
|
||||
def on_watermark_toggle_clicked(self, is_checked):
|
||||
"""워터마크 토글 여부에 따라 회사 이름 입력 필드와 확인 버튼을 표시/숨김"""
|
||||
if is_checked:
|
||||
self.watermark_text_label.setVisible(True)
|
||||
self.watermark_text_input.setVisible(True)
|
||||
self.watermark_confirm_button.setVisible(True) # 확인 버튼도 함께 표시
|
||||
|
||||
# 워터마크 텍스트 입력 필드의 내용을 딕셔너리에 저장
|
||||
self.toggle_states['watermark_text'] = self.watermark_text_input.text()
|
||||
|
||||
else:
|
||||
self.watermark_text_label.setVisible(False)
|
||||
self.watermark_text_input.setVisible(False)
|
||||
self.watermark_confirm_button.setVisible(False) # 확인 버튼도 함께 숨김
|
||||
|
||||
def update_watermark_text(self):
|
||||
"""QLineEdit에 입력된 텍스트를 toggle_states['watermark_text']에 저장"""
|
||||
self.toggle_states['watermark_text'] = self.watermark_text_input.text()
|
||||
self.logger.debug(f"Updated watermark text: {self.toggle_states['watermark_text']}")
|
||||
|
||||
def update_max_option_count(self):
|
||||
"""QSpinBox에 입력된 값을 toggle_states['max_option_count']에 저장"""
|
||||
self.toggle_states['max_option_count'] = self.max_option_count_input.value() # 정수 값 가져오기
|
||||
self.logger.debug(f"최대 선택 가능 옵션 수 업데이트: {self.toggle_states['max_option_count']}")
|
||||
|
||||
def on_admin_toggle_clicked(self, is_checked):
|
||||
"""관리자 토글 상태에 따라 관리자와 직원 필드를 표시/숨김"""
|
||||
if is_checked:
|
||||
# 관리자 모드: 직원 레이아웃을 숨기고, 관리자 PW를 표시
|
||||
self.set_layout_visibility(self.admin_layout, True)
|
||||
self.set_layout_visibility(self.user_layout, False)
|
||||
else:
|
||||
# 직원 모드: 관리자 PW를 숨기고, 직원 레이아웃을 표시
|
||||
self.set_layout_visibility(self.admin_layout, False)
|
||||
self.set_layout_visibility(self.user_layout, True)
|
||||
|
||||
def on_vd_mode_for_detail_imageTrans_clicked(self, is_checked):
|
||||
"""상페이미지 번역여부에 따라 VD 모드 선택 필드를 표시/숨김"""
|
||||
if is_checked:
|
||||
self.vd_mode_toggle.setVisible(True)
|
||||
self.vd_mode_toggle_label.setVisible(True)
|
||||
else:
|
||||
self.vd_mode_toggle.setVisible(False)
|
||||
self.vd_mode_toggle_label.setVisible(False)
|
||||
|
||||
|
||||
def set_layout_visibility(self, changelayout, visible):
|
||||
"""레이아웃에 포함된 모든 위젯의 가시성을 설정"""
|
||||
for i in range(changelayout.count()):
|
||||
widget = changelayout.itemAt(i).widget()
|
||||
if widget:
|
||||
widget.setVisible(visible)
|
||||
|
||||
def on_cmb_test_button_clicked(self, test_cat):
|
||||
"""크무비 설정 실행 버튼 클릭 시 호출"""
|
||||
self.logger.debug('크무비 테스트 버튼 클릭됨')
|
||||
|
||||
text, ok = QInputDialog.getText(self, "카테고리 입력 테스트", "카테고리를 형식에 맞게 입력하세요:")
|
||||
if ok and text: # 사용자가 확인 버튼을 누르고 텍스트를 입력한 경우
|
||||
stage = self.cmb_diag.get_crmobi_stage(text)
|
||||
self.logger.debug(f"{stage}")
|
||||
|
||||
def on_cmb_button_clicked(self):
|
||||
"""크무비 설정 실행 버튼 클릭 시 호출"""
|
||||
self.logger.debug('크무비 설정 버튼 클릭됨')
|
||||
self.cmb_diag.show()
|
||||
|
||||
@Slot()
|
||||
def start_browser(self):
|
||||
"""크롬 브라우저 실행 후 로그인"""
|
||||
self.logger.debug('크롬 브라우저를 실행합니다...')
|
||||
# self.logger.debug(f'self.browser_controller.page : {self.browser_controller.page}')
|
||||
optionIMGTrans_status = self.toggle_states['optionIMGTrans']
|
||||
detail_IMGTrans_status = self.toggle_states['detail_IMGTrans']
|
||||
vd_mode_status = self.toggle_states['vd_mode']
|
||||
|
||||
if optionIMGTrans_status or detail_IMGTrans_status:
|
||||
self.logger.debug(f"optionIMGTrans_status : {optionIMGTrans_status}, detail_IMGTrans_status : {detail_IMGTrans_status}")
|
||||
self.whale_translator = WhaleTranslator(self.app, self.logger, secret_mode=True, vd_mode=vd_mode_status) # 모드 켜기
|
||||
self.whale_translator.start_whale_browser()
|
||||
|
||||
self.login_infos['admin_id'] = self.admin_id_input.text()
|
||||
self.login_infos['admin_pw'] = self.admin_pw_input.text()
|
||||
self.login_infos['user_id'] = self.user_id_input.text()
|
||||
self.login_infos['user_pw'] = self.user_pw_input.text()
|
||||
self.login_infos['is_admin'] = self.admin_toggle.isChecked()
|
||||
|
||||
self.browser_controller.whale_browser = self.whale_translator # whale_browser 설정
|
||||
self.browser_controller.start()
|
||||
|
||||
# 로그인 정보 저장
|
||||
self.save_settings()
|
||||
|
||||
# 각 핸들러에 초기화된 page 객체 전달.
|
||||
self.optionHandler.update_page(self.browser_controller.page)
|
||||
self.optionHandler.update_whale(self.whale_translator)
|
||||
self.titleHandler.update_page(self.browser_controller.page)
|
||||
self.priceHandler.update_page(self.browser_controller.page)
|
||||
|
||||
self.translate_button.setEnabled(True)
|
||||
self.pause_button.setEnabled(True)
|
||||
|
||||
def save_settings(self):
|
||||
"""QSettings에 사용자 정보 저장"""
|
||||
self.settings.setValue("admin/id", self.admin_id_input.text())
|
||||
self.settings.setValue("admin/pw", self.admin_pw_input.text())
|
||||
self.settings.setValue("user/id", self.user_id_input.text())
|
||||
self.settings.setValue("user/pw", self.user_pw_input.text())
|
||||
self.settings.setValue("admin/toggle", self.admin_toggle.isChecked())
|
||||
self.settings.setValue("watermark_text", self.watermark_text_input.text())
|
||||
self.update_watermark_text()
|
||||
|
||||
def load_settings(self):
|
||||
"""QSettings에서 사용자 정보 불러오기"""
|
||||
self.admin_id_input.setText(self.settings.value("admin/id", ""))
|
||||
self.admin_pw_input.setText(self.settings.value("admin/pw", ""))
|
||||
self.user_id_input.setText(self.settings.value("user/id", ""))
|
||||
self.user_pw_input.setText(self.settings.value("user/pw", ""))
|
||||
admin_toggle_state = self.settings.value("admin/toggle", "false") == "true"
|
||||
self.admin_toggle.setChecked(admin_toggle_state)
|
||||
self.on_admin_toggle_clicked(admin_toggle_state)
|
||||
|
||||
self.watermark_text_input.setText(self.settings.value("watermark_text", ""))
|
||||
self.update_watermark_text()
|
||||
|
||||
self.load_toggle_settings()
|
||||
|
||||
def update_total_progress(self, current_value, total_value):
|
||||
|
||||
if current_value == 0:
|
||||
self.total_progress_bar.setValue(0)
|
||||
self.total_progress_bar.setFormat("상품 수정 대기") # current_value가 0일 때 표시될 텍스트
|
||||
else:
|
||||
# 프로그레스바의 값과 텍스트를 설정
|
||||
percentage = int((current_value / total_value) * 100)
|
||||
self.total_progress_bar.setValue(percentage)
|
||||
self.total_progress_bar.setFormat(f"상품 {current_value}/{total_value}개 완료 [{percentage}%]")
|
||||
|
||||
def update_detail_progress(self, current_value, total_value):
|
||||
|
||||
if current_value == 0:
|
||||
self.detail_progress_bar.setValue(0)
|
||||
self.detail_progress_bar.setFormat("수정 대기") # current_value가 0일 때 표시될 텍스트
|
||||
else:
|
||||
# 프로그레스바의 값과 텍스트를 설정
|
||||
percentage = int((current_value / total_value) * 100)
|
||||
self.detail_progress_bar.setValue(percentage)
|
||||
self.detail_progress_bar.setFormat(f"{current_value}/{total_value}개 완료 [{percentage}%]")
|
||||
|
||||
@Slot()
|
||||
async def start_translation(self):
|
||||
self.logger.debug('번역 작업을 시작합니다...')
|
||||
self.running = True # 번역 작업이 시작됨
|
||||
|
||||
try:
|
||||
# # 1. "신규 상품 등록" 페이지로 이동
|
||||
# self.logger.debug('신규 상품 등록 페이지로 이동 중...')
|
||||
# await self.browser_controller.go_to_new_product_page()
|
||||
|
||||
# 2. 총 상품 수 수집
|
||||
await self.browser_controller.scroll_page_to_bottom() # 동적 로딩을 위해 끝까지 스크롤
|
||||
|
||||
# total_products = await self.browser_controller.get_total_product_count(ed_mode=self.toggle_states['ed_mode'])
|
||||
|
||||
# get_total_product_count 메서드 호출 후 결과를 딕셔너리로 받음
|
||||
result = await self.browser_controller.get_total_product_count()
|
||||
# 딕셔너리에서 총 상품 수와 페이지당 상품 수를 추출
|
||||
total_products = result.get("total_count", 0)
|
||||
items_per_page = result.get("items_per_page", 0)
|
||||
self.logger.debug(f"총 상품 수: {total_products}, 페이지당 상품 수: {items_per_page}")
|
||||
|
||||
if total_products == 0:
|
||||
self.logger.debug('수집할 상품이 없습니다. 작업을 종료합니다.')
|
||||
return
|
||||
|
||||
self.total_progress_bar.setMaximum(total_products)
|
||||
self.total_progress_bar.setValue(0)
|
||||
completed_count = 0
|
||||
self.update_total_progress(completed_count, total_products)
|
||||
|
||||
page_number = 1
|
||||
|
||||
# 3. 총 상품 수만큼 반복 작업 수행
|
||||
while self.running and completed_count < total_products:
|
||||
self.logger.debug(f'현재 페이지: {page_number}')
|
||||
|
||||
if not page_number == 1:
|
||||
await self.browser_controller.scroll_page_to_top()
|
||||
self.logger.debug(f'1페이지가 아니므로 동적로딩을 위해 휠 스크롤 업')
|
||||
|
||||
if not self.toggle_states['ed_mode']:
|
||||
# 4. 현재 페이지의 모든 "세부사항 수정 및 업로드" 버튼 찾기
|
||||
self.logger.debug('수정모드가 아니므로 상품수정 버튼 elements를 수집합니다.')
|
||||
product_buttons = await self.browser_controller.get_product_edit_buttons_by_templete()
|
||||
else:
|
||||
self.logger.debug('상품정보 수집')
|
||||
product_infos, product_name_elements = await self.browser_controller.collect_product_info(items_per_page, ed_mode=self.toggle_states['ed_mode'])
|
||||
self.logger.debug(f"product_infos : {product_infos}")
|
||||
self.logger.debug('수정모드이므로 상품명 elements를 수정버튼으로 활용합니다.')
|
||||
product_buttons = product_name_elements
|
||||
|
||||
self.logger.debug(f"product_buttons 갯수 : [{len(product_buttons)}]개")
|
||||
|
||||
if not product_buttons:
|
||||
self.logger.debug('수정할 상품이 없습니다. 작업을 종료합니다.')
|
||||
break
|
||||
|
||||
if self.toggle_states['recovery_mode']:
|
||||
deleted_imgs = self.browser_controller.deleted_img_urls_from_logs()
|
||||
|
||||
# 5. 각 상품에 대해 번역 작업 수행
|
||||
for index, button in enumerate(product_buttons, start=1):
|
||||
if not self.running:
|
||||
self.logger.debug('번역 작업이 중단되었습니다.')
|
||||
return
|
||||
# 상품명 수집 오류 처리
|
||||
self.logger.debug(f'{index}/{len(product_buttons)} 버튼의 활성상태 확인 중...')
|
||||
|
||||
is_disabled = await self.browser_controller.is_button_disabled(button)
|
||||
if is_disabled:
|
||||
self.logger.debug(f'{index}/{len(product_buttons)}: 상품의 수정버튼이 비활성화되어 있어 작업을 건너뜁니다.')
|
||||
continue
|
||||
|
||||
self.logger.debug(f'{index}/{len(product_buttons)}: 세부사항 수정 작업 중...')
|
||||
|
||||
# 상품 수정 다이얼로그 열기
|
||||
await self.browser_controller.open_product_edit_dialog(button)
|
||||
|
||||
# 상품명과 카테고리 수집
|
||||
self.start_stage(0)
|
||||
product_name = await self.titleHandler.get_original_product_name() # 원본상품명 가져오기
|
||||
product_category = await self.titleHandler.get_category(market='ss') # 카테고리 가져오기
|
||||
|
||||
# await self.edit_title()
|
||||
self.complete_stage(0)
|
||||
|
||||
if self.toggle_states['optionTrnas'] or self.toggle_states['optionIMGTrans'] or self.toggle_states['optionAutoSelect']:
|
||||
self.logger.debug(f"옵션수정 : optionTrnas={self.toggle_states['optionTrnas']} + optionIMGTrans={self.toggle_states['optionIMGTrans']} + optionAutoSelect{self.toggle_states['optionAutoSelect']}")
|
||||
# 옵션 수정
|
||||
self.start_stage(0)
|
||||
await self.edit_option(product_name)
|
||||
self.complete_stage(0)
|
||||
if self.toggle_states['price']:
|
||||
self.logger.debug(f"가격수정 : {self.toggle_states['price']} ")
|
||||
# 가격 수정
|
||||
# self.start_stage(0)
|
||||
await self.edit_price(product_category)
|
||||
# self.complete_stage(0)
|
||||
|
||||
if self.toggle_states['thumb']:
|
||||
pass
|
||||
|
||||
if self.toggle_states['tag']:
|
||||
pass
|
||||
|
||||
if self.toggle_states['title']:
|
||||
pass
|
||||
|
||||
if self.toggle_states['detail_Option'] or self.toggle_states['detail_IMGTrans']:
|
||||
self.logger.debug(f"상세페이지 수정 : {self.toggle_states['detail_Option']} + {self.toggle_states['detail_IMGTrans']}")
|
||||
|
||||
# 상세페이지 수정
|
||||
self.start_stage(1)
|
||||
if not self.toggle_states['recovery_mode']:
|
||||
await self.detail_trans()
|
||||
else:
|
||||
await self.detail_trans_for_recovery(product_name, deleted_imgs)
|
||||
self.complete_stage(1)
|
||||
|
||||
# 수정 후 저장
|
||||
self.logger.debug('상품 세부사항 저장 중...')
|
||||
await self.browser_controller.save_and_ecs_product_edit()
|
||||
|
||||
completed_count += 1
|
||||
self.update_total_progress(completed_count, total_products)
|
||||
self.logger.debug(f'{completed_count}/[{total_products}]개 상품 수정 완료.')
|
||||
|
||||
if completed_count >= total_products:
|
||||
self.logger.debug('모든 상품이 완료되었습니다.')
|
||||
return
|
||||
|
||||
# 6. 다음 페이지로 이동 (있으면)
|
||||
if not await self.browser_controller.go_to_next_page():
|
||||
self.logger.debug('더 이상 페이지가 없습니다. 작업을 종료합니다.')
|
||||
break
|
||||
page_number += 1
|
||||
|
||||
if self.running:
|
||||
self.logger.debug('모든 상품 번역 및 저장 완료.')
|
||||
self.running = False # 작업 종료 후 상태를 False로 전환
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug(f"번역 작업 중 오류 발생: {e}", exc_info=True)
|
||||
self.running = False
|
||||
|
||||
def pause_translation(self):
|
||||
self.logger.debug('번역 작업을 중단합니다...')
|
||||
self.running = False # 번역 작업 중단
|
||||
|
||||
async def close(self):
|
||||
self.logger.debug('프로그램을 종료합니다...')
|
||||
self.save_settings()
|
||||
await self.browser_controller.close_browser() # 브라우저 종료
|
||||
if self.toggle_states['vd_mode']:
|
||||
self.whale_translator.close_all_virtual_desktops()
|
||||
super().close()
|
||||
|
||||
async def detail_trans(self):
|
||||
# 상세페이지 탭 클릭
|
||||
await self.browser_controller.click_detail_tab()
|
||||
|
||||
# await self.browser_controller.page.wait_for_load_state('networkidle', timeout=10000)
|
||||
|
||||
self.detail_progress_bar.setValue(0)
|
||||
self.detail_progress_bar.setVisible(True)
|
||||
|
||||
# 이미지 URL 추출
|
||||
# image_urls = self.browser_controller.extract_image_urls()
|
||||
image_urls = await self.browser_controller.extract_image_urls(self.optionHandler, is_option_data=True) # 코루틴 실행
|
||||
total_images = len(image_urls)
|
||||
self.logger.debug(f"현재 상품의 총 이미지 수 : {total_images}개")
|
||||
|
||||
self.detail_image_count += total_images
|
||||
|
||||
# 이미지 번역 작업 진행
|
||||
for i, url in enumerate(image_urls):
|
||||
current_image_count = i +1
|
||||
|
||||
if not self.running:
|
||||
self.logger.debug('번역 작업이 중단되었습니다.')
|
||||
break
|
||||
|
||||
self.logger.debug(f"웨일 브라우저를 활용한 이미지 번역 프로세스")
|
||||
is_success_translated = self.whale_translator.translate_image(url)
|
||||
|
||||
is_paste_success = self.browser_controller.paste_image_in_chrome(self.clipboardImageManager, url, is_success_translated, self.toggle_states)
|
||||
if is_paste_success:
|
||||
self.logger.debug(f"{url} gui 이미지 붙여넣기 성공")
|
||||
else:
|
||||
self.logger.debug(f"{url} gui 이미지 붙여넣기 실패")
|
||||
|
||||
self.logger.debug(f"Progress Update")
|
||||
self.update_detail_progress(i,total_images)
|
||||
|
||||
current_image_count += 1
|
||||
|
||||
# 수정 후 저장
|
||||
self.logger.debug('상품 세부사항 저장 중...')
|
||||
await self.browser_controller.save_product_edit()
|
||||
|
||||
self.detail_progress_bar.setVisible(False)
|
||||
self.detail_progress_bar.setValue(0)
|
||||
|
||||
async def detail_trans_for_recovery(self, product_name, deleted_imgs):
|
||||
# 상세페이지 탭 클릭
|
||||
await self.browser_controller.click_detail_tab()
|
||||
|
||||
self.detail_progress_bar.setValue(0)
|
||||
self.detail_progress_bar.setVisible(True)
|
||||
|
||||
self.logger.debug('recovery_image_urls 메서드 호출')
|
||||
await self.browser_controller.recovery_image_urls(product_name, deleted_imgs)
|
||||
|
||||
# 수정 후 저장
|
||||
self.logger.debug('상품 세부사항 저장 중...')
|
||||
await self.browser_controller.save_product_edit()
|
||||
|
||||
self.detail_progress_bar.setVisible(False)
|
||||
self.detail_progress_bar.setValue(0)
|
||||
|
||||
async def edit_option(self, product_name):
|
||||
# 상세페이지 탭 클릭
|
||||
await self.browser_controller.click_option_tab()
|
||||
# await self.browser_controller.page.wait_for_load_state('networkidle', timeout=10000)
|
||||
self.detail_progress_bar.setVisible(True)
|
||||
|
||||
# 옵션 최대선택갯수
|
||||
max_option_count = 20
|
||||
self.current_options_info = await self.optionHandler.process_options(product_name, max_option_count, self.toggle_states)
|
||||
|
||||
# 수정 후 저장
|
||||
# await self.optionHandler.save_option()
|
||||
await self.browser_controller.save_product_edit()
|
||||
|
||||
self.detail_progress_bar.setVisible(False)
|
||||
|
||||
|
||||
async def edit_price(self, product_category):
|
||||
# 상세페이지 탭 클릭
|
||||
await self.browser_controller.click_price_tab()
|
||||
# await self.browser_controller.page.wait_for_load_state('networkidle', timeout=10000)
|
||||
self.detail_progress_bar.setVisible(True)
|
||||
|
||||
# 가격 수정 프로세스
|
||||
await self.priceHandler.process_price(category=product_category)
|
||||
|
||||
# 수정 후 저장
|
||||
await self.browser_controller.save_product_edit()
|
||||
|
||||
self.detail_progress_bar.setVisible(False)
|
||||
|
||||
38
main2.py
38
main2.py
|
|
@ -1,38 +0,0 @@
|
|||
import ctypes
|
||||
from PySide6.QtWidgets import QApplication
|
||||
from gui2 import AutoPercentyGUI
|
||||
from logger_module import setup_logger
|
||||
import sys
|
||||
import os
|
||||
|
||||
# 절전모드를 방지하는 설정 값
|
||||
ES_CONTINUOUS = 0x80000000
|
||||
ES_SYSTEM_REQUIRED = 0x00000001
|
||||
|
||||
def prevent_sleep():
|
||||
"""절전모드 방지를 위해 시스템 설정을 변경"""
|
||||
ctypes.windll.kernel32.SetThreadExecutionState(ES_CONTINUOUS | ES_SYSTEM_REQUIRED)
|
||||
|
||||
def allow_sleep():
|
||||
"""절전모드 방지 설정을 해제"""
|
||||
ctypes.windll.kernel32.SetThreadExecutionState(ES_CONTINUOUS)
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 로깅 설정
|
||||
logger = setup_logger('default_logger', 'appTranslator.log')
|
||||
|
||||
# 절전모드 방지 활성화
|
||||
prevent_sleep()
|
||||
|
||||
# PySide6 GUI 실행
|
||||
app = QApplication([])
|
||||
window = AutoPercentyGUI(logger)
|
||||
window.show()
|
||||
|
||||
# 프로그램 종료 처리
|
||||
try:
|
||||
app.exec()
|
||||
finally:
|
||||
# 프로그램 종료 시 close 메서드 호출
|
||||
window.close()
|
||||
allow_sleep() # 종료 후 절전모드 허용
|
||||
|
|
@ -48,9 +48,10 @@ class OptionHandler:
|
|||
def update_page(self, page1):
|
||||
self.page = page1
|
||||
self.logger.debug(f"page객체 업데이트 : {page1}")
|
||||
def update_whale(self, whale1):
|
||||
self.whale_translator = whale1
|
||||
self.logger.debug(f"whale_translator 객체 업데이트 : {whale1}")
|
||||
|
||||
def update_whale(self):
|
||||
self.whale_translator = self.browser_controller.get_whale()
|
||||
self.logger.debug(f"whale_translator 업데이트 : {self.whale_translator}")
|
||||
|
||||
def init_option_info(self):
|
||||
self.option_info = {
|
||||
|
|
@ -168,6 +169,8 @@ class OptionHandler:
|
|||
:param max_option_count: 최대 옵션 갯수 (기본값 20).
|
||||
"""
|
||||
try:
|
||||
self.update_whale()
|
||||
|
||||
self.logger.debug(f"상품명: {product_name}에 대한 옵션을 처리 중...")
|
||||
|
||||
self.logger.debug(f"이전 상품의 옵션정보를 초기화합니다.")
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 1.5 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 1.8 KiB |
|
|
@ -0,0 +1,51 @@
|
|||
import os
|
||||
from PIL import Image, ImageDraw
|
||||
import pytesseract
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
# 현재 실행 중인 main.py의 경로를 기준으로 프로젝트 폴더 경로 가져오기
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
# Tesseract 실행 파일과 tessdata 폴더 경로 설정
|
||||
tesseract_path = os.path.join(current_dir, 'src', 'Tesseract-OCR', 'tesseract.exe')
|
||||
tessdata_path = os.path.join(current_dir, 'src', 'Tesseract-OCR', 'tessdata')
|
||||
|
||||
# Tesseract 경로 및 데이터 경로 설정
|
||||
pytesseract.pytesseract.tesseract_cmd = tesseract_path
|
||||
os.environ['TESSDATA_PREFIX'] = tessdata_path
|
||||
|
||||
# Tesseract 설정 (언어는 한글, 중국어, 영어 지원)
|
||||
# tessdata_dir_config = '--oem 1 -l kor+chi_sim+eng+num' # tessdata_dir 옵션 제거
|
||||
tessdata_dir_config = '--oem 1 -l chi_sim' # tessdata_dir 옵션 제거
|
||||
|
||||
def show_ocr_results_with_boxes(pil_img, conf_threshold=40):
|
||||
# 텍스트 인식 및 바운딩 박스 가져오기
|
||||
data = pytesseract.image_to_data(pil_img, config=tessdata_dir_config, output_type=pytesseract.Output.DICT)
|
||||
|
||||
# 텍스트 인식된 이미지에 박스를 그리기 위해 PIL 이미지 복사
|
||||
img_with_boxes = pil_img.copy()
|
||||
draw = ImageDraw.Draw(img_with_boxes)
|
||||
|
||||
recognized_text_count = 0
|
||||
|
||||
for i in range(len(data['text'])):
|
||||
if int(data['conf'][i]) > conf_threshold: # OCR 신뢰도 필터링
|
||||
(x, y, w, h) = (data['left'][i], data['top'][i], data['width'][i], data['height'][i])
|
||||
recognized_text = data['text'][i]
|
||||
|
||||
# 텍스트 인식된 영역에 박스 그리기
|
||||
draw.rectangle([(x, y), (x + w, y + h)], outline="red", width=2)
|
||||
draw.text((x, y - 10), f"{recognized_text} ({data['conf'][i]})", fill="blue") # 인식된 텍스트와 신뢰도 표시
|
||||
|
||||
recognized_text_count += 1
|
||||
|
||||
# 결과 정보 출력
|
||||
print(f"인식된 텍스트 개수: {recognized_text_count}")
|
||||
img_with_boxes.show() # 박스가 그려진 이미지 표시
|
||||
|
||||
# 예제 사용
|
||||
if __name__ == "__main__":
|
||||
input_image = Image.open("1.jpg") # 이미지를 불러옵니다.
|
||||
inpainted_image = show_ocr_results_with_boxes(input_image)
|
||||
inpainted_image.show() # 결과 이미지를 표시하거나 저장
|
||||
BIN
test/output.png
BIN
test/output.png
Binary file not shown.
|
Before Width: | Height: | Size: 349 KiB |
|
|
@ -1 +0,0 @@
|
|||
{"detail":[{"type":"missing","loc":["body","file"],"msg":"Field required","input":null}]}
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 464 B |
Binary file not shown.
|
Before Width: | Height: | Size: 327 B |
|
|
@ -1,790 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<meta name="generator" content="AsciiDoc 10.2.0">
|
||||
<title>AMBIGUOUS_WORDS(1)</title>
|
||||
<style type="text/css">
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 1em 5% 1em 5%;
|
||||
}
|
||||
|
||||
a {
|
||||
color: blue;
|
||||
text-decoration: underline;
|
||||
}
|
||||
a:visited {
|
||||
color: fuchsia;
|
||||
}
|
||||
|
||||
em {
|
||||
font-style: italic;
|
||||
color: navy;
|
||||
}
|
||||
|
||||
strong {
|
||||
font-weight: bold;
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
}
|
||||
|
||||
h1, h2, h3 {
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
padding-top: 0.5em;
|
||||
}
|
||||
h3 {
|
||||
float: left;
|
||||
}
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
hr {
|
||||
border: 1px solid silver;
|
||||
}
|
||||
|
||||
p {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
#email {
|
||||
}
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.admonitionblock {
|
||||
margin-top: 2.0em;
|
||||
margin-bottom: 2.0em;
|
||||
margin-right: 10%;
|
||||
color: #606060;
|
||||
}
|
||||
|
||||
div.content { /* Block element content. */
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
div.title + * {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
td div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content + div.title {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
|
||||
div.verseblock + div.attribution {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.admonitionblock .icon {
|
||||
vertical-align: top;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
text-decoration: underline;
|
||||
color: #527bbd;
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
div.admonitionblock td.content {
|
||||
padding-left: 0.5em;
|
||||
border-left: 3px solid #dddddd;
|
||||
}
|
||||
|
||||
div.exampleblock > div.content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
dt {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0;
|
||||
font-style: normal;
|
||||
color: navy;
|
||||
}
|
||||
dd > *:first-child {
|
||||
margin-top: 0.1em;
|
||||
}
|
||||
|
||||
ul, ol {
|
||||
list-style-position: outside;
|
||||
}
|
||||
ol.arabic {
|
||||
list-style-type: decimal;
|
||||
}
|
||||
ol.loweralpha {
|
||||
list-style-type: lower-alpha;
|
||||
}
|
||||
ol.upperalpha {
|
||||
list-style-type: upper-alpha;
|
||||
}
|
||||
ol.lowerroman {
|
||||
list-style-type: lower-roman;
|
||||
}
|
||||
ol.upperroman {
|
||||
list-style-type: upper-roman;
|
||||
}
|
||||
|
||||
div.compact ul, div.compact ol,
|
||||
div.compact p, div.compact p,
|
||||
div.compact div, div.compact div {
|
||||
margin-top: 0.1em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
div.hdlist tr {
|
||||
padding-bottom: 15px;
|
||||
}
|
||||
dt.hdlist1.strong, td.hdlist1.strong {
|
||||
font-weight: bold;
|
||||
}
|
||||
td.hdlist1 {
|
||||
vertical-align: top;
|
||||
font-style: normal;
|
||||
padding-right: 0.8em;
|
||||
color: navy;
|
||||
}
|
||||
td.hdlist2 {
|
||||
vertical-align: top;
|
||||
}
|
||||
div.hdlist.compact tr {
|
||||
margin: 0;
|
||||
padding-bottom: 0;
|
||||
}
|
||||
|
||||
.comment {
|
||||
background: yellow;
|
||||
}
|
||||
|
||||
.footnote, .footnoteref {
|
||||
font-size: 0.8em;
|
||||
}
|
||||
|
||||
span.footnote, span.footnoteref {
|
||||
vertical-align: super;
|
||||
}
|
||||
|
||||
#footnotes {
|
||||
margin: 20px 0 20px 0;
|
||||
padding: 7px 0 0 0;
|
||||
}
|
||||
|
||||
#footnotes div.footnote {
|
||||
margin: 0 0 5px 0;
|
||||
}
|
||||
|
||||
#footnotes hr {
|
||||
border: none;
|
||||
border-top: 1px solid silver;
|
||||
height: 1px;
|
||||
text-align: left;
|
||||
margin-left: 0;
|
||||
width: 20%;
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
div.toclevel2 {
|
||||
margin-left: 2em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel3 {
|
||||
margin-left: 4em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overridden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
var iterate = function (el) {
|
||||
for (var i = el.firstChild; i != null; i = i.nextSibling) {
|
||||
if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
|
||||
var mo = re.exec(i.tagName);
|
||||
if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
|
||||
result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
|
||||
}
|
||||
iterate(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
iterate(el);
|
||||
return result;
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
if (entry.element.id == "")
|
||||
entry.element.id = "_toc_" + i;
|
||||
var a = document.createElement("a");
|
||||
a.href = "#" + entry.element.id;
|
||||
a.appendChild(document.createTextNode(entry.text));
|
||||
var div = document.createElement("div");
|
||||
div.appendChild(a);
|
||||
div.className = "toclevel" + entry.toclevel;
|
||||
toc.appendChild(div);
|
||||
}
|
||||
if (entries.length == 0)
|
||||
toc.parentNode.removeChild(toc);
|
||||
},
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Footnotes generator
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* Based on footnote generation code from:
|
||||
* http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
}
|
||||
if (n == 0)
|
||||
noteholder.parentNode.removeChild(noteholder);
|
||||
else {
|
||||
// Process footnoterefs.
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnoteref") {
|
||||
var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
|
||||
href = href.match(/#.*/)[0]; // Because IE return full URL.
|
||||
n = refs[href];
|
||||
spans[i].innerHTML =
|
||||
"[<a href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
AMBIGUOUS_WORDS(1) Manual Page
|
||||
</h1>
|
||||
<h2>NAME</h2>
|
||||
<div class="sectionbody">
|
||||
<p>ambiguous_words -
|
||||
generate sets of words Tesseract is likely to find ambiguous
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><strong>ambiguous_words</strong> [-l lang] <em>TESSDATADIR</em> <em>WORDLIST</em> <em>AMBIGUOUSFILE</em></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>ambiguous_words(1) runs Tesseract in a special mode, and for each word
|
||||
in word list, produces a set of words which Tesseract thinks might be
|
||||
ambiguous with it. <em>TESSDATADIR</em> must be set to the absolute path of
|
||||
a directory containing <em>tessdata/lang.traineddata</em>.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1)</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated
|
||||
2024-05-19 13:04:22 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,857 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<meta name="generator" content="AsciiDoc 10.2.0">
|
||||
<title>CLASSIFIER_TESTER(1)</title>
|
||||
<style type="text/css">
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 1em 5% 1em 5%;
|
||||
}
|
||||
|
||||
a {
|
||||
color: blue;
|
||||
text-decoration: underline;
|
||||
}
|
||||
a:visited {
|
||||
color: fuchsia;
|
||||
}
|
||||
|
||||
em {
|
||||
font-style: italic;
|
||||
color: navy;
|
||||
}
|
||||
|
||||
strong {
|
||||
font-weight: bold;
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
}
|
||||
|
||||
h1, h2, h3 {
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
padding-top: 0.5em;
|
||||
}
|
||||
h3 {
|
||||
float: left;
|
||||
}
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
hr {
|
||||
border: 1px solid silver;
|
||||
}
|
||||
|
||||
p {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
#email {
|
||||
}
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.admonitionblock {
|
||||
margin-top: 2.0em;
|
||||
margin-bottom: 2.0em;
|
||||
margin-right: 10%;
|
||||
color: #606060;
|
||||
}
|
||||
|
||||
div.content { /* Block element content. */
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
div.title + * {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
td div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content + div.title {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
|
||||
div.verseblock + div.attribution {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.admonitionblock .icon {
|
||||
vertical-align: top;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
text-decoration: underline;
|
||||
color: #527bbd;
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
div.admonitionblock td.content {
|
||||
padding-left: 0.5em;
|
||||
border-left: 3px solid #dddddd;
|
||||
}
|
||||
|
||||
div.exampleblock > div.content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
dt {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0;
|
||||
font-style: normal;
|
||||
color: navy;
|
||||
}
|
||||
dd > *:first-child {
|
||||
margin-top: 0.1em;
|
||||
}
|
||||
|
||||
ul, ol {
|
||||
list-style-position: outside;
|
||||
}
|
||||
ol.arabic {
|
||||
list-style-type: decimal;
|
||||
}
|
||||
ol.loweralpha {
|
||||
list-style-type: lower-alpha;
|
||||
}
|
||||
ol.upperalpha {
|
||||
list-style-type: upper-alpha;
|
||||
}
|
||||
ol.lowerroman {
|
||||
list-style-type: lower-roman;
|
||||
}
|
||||
ol.upperroman {
|
||||
list-style-type: upper-roman;
|
||||
}
|
||||
|
||||
div.compact ul, div.compact ol,
|
||||
div.compact p, div.compact p,
|
||||
div.compact div, div.compact div {
|
||||
margin-top: 0.1em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
div.hdlist tr {
|
||||
padding-bottom: 15px;
|
||||
}
|
||||
dt.hdlist1.strong, td.hdlist1.strong {
|
||||
font-weight: bold;
|
||||
}
|
||||
td.hdlist1 {
|
||||
vertical-align: top;
|
||||
font-style: normal;
|
||||
padding-right: 0.8em;
|
||||
color: navy;
|
||||
}
|
||||
td.hdlist2 {
|
||||
vertical-align: top;
|
||||
}
|
||||
div.hdlist.compact tr {
|
||||
margin: 0;
|
||||
padding-bottom: 0;
|
||||
}
|
||||
|
||||
.comment {
|
||||
background: yellow;
|
||||
}
|
||||
|
||||
.footnote, .footnoteref {
|
||||
font-size: 0.8em;
|
||||
}
|
||||
|
||||
span.footnote, span.footnoteref {
|
||||
vertical-align: super;
|
||||
}
|
||||
|
||||
#footnotes {
|
||||
margin: 20px 0 20px 0;
|
||||
padding: 7px 0 0 0;
|
||||
}
|
||||
|
||||
#footnotes div.footnote {
|
||||
margin: 0 0 5px 0;
|
||||
}
|
||||
|
||||
#footnotes hr {
|
||||
border: none;
|
||||
border-top: 1px solid silver;
|
||||
height: 1px;
|
||||
text-align: left;
|
||||
margin-left: 0;
|
||||
width: 20%;
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
div.toclevel2 {
|
||||
margin-left: 2em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel3 {
|
||||
margin-left: 4em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overridden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
var iterate = function (el) {
|
||||
for (var i = el.firstChild; i != null; i = i.nextSibling) {
|
||||
if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
|
||||
var mo = re.exec(i.tagName);
|
||||
if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
|
||||
result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
|
||||
}
|
||||
iterate(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
iterate(el);
|
||||
return result;
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
if (entry.element.id == "")
|
||||
entry.element.id = "_toc_" + i;
|
||||
var a = document.createElement("a");
|
||||
a.href = "#" + entry.element.id;
|
||||
a.appendChild(document.createTextNode(entry.text));
|
||||
var div = document.createElement("div");
|
||||
div.appendChild(a);
|
||||
div.className = "toclevel" + entry.toclevel;
|
||||
toc.appendChild(div);
|
||||
}
|
||||
if (entries.length == 0)
|
||||
toc.parentNode.removeChild(toc);
|
||||
},
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Footnotes generator
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* Based on footnote generation code from:
|
||||
* http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
}
|
||||
if (n == 0)
|
||||
noteholder.parentNode.removeChild(noteholder);
|
||||
else {
|
||||
// Process footnoterefs.
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnoteref") {
|
||||
var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
|
||||
href = href.match(/#.*/)[0]; // Because IE return full URL.
|
||||
n = refs[href];
|
||||
spans[i].innerHTML =
|
||||
"[<a href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body class="article">
|
||||
<div id="header">
|
||||
<h1>CLASSIFIER_TESTER(1)</h1>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_name">NAME</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>classifier_tester - for <strong>legacy tesseract</strong> engine.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><strong>classifier_tester</strong> -U <em>unicharset_file</em> -F <em>font_properties_file</em> -X <em>xheights_file</em> -classifier <em>x</em> -lang <em>lang</em> [-output_trainer trainer] *.tr</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>classifier_tester(1) runs Tesseract in a special mode.
|
||||
It takes a list of .tr files and tests a character classifier
|
||||
on data as formatted for training,
|
||||
but it doesn’t have to be the same as the training data.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_in_out_arguments">IN/OUT ARGUMENTS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>a list of .tr files</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_options">OPTIONS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="dlist"><dl>
|
||||
<dt class="hdlist1">
|
||||
-l <em>lang</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Input) three character language code; default value <em>eng</em>.
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
-classifier <em>x</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Input) One of "pruner", "full".
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
-U <em>unicharset</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Input) The unicharset for the language.
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
-F <em>font_properties_file</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Input) font properties file, each line is of the following form, where each field other than the font name is 0 or 1:
|
||||
</p>
|
||||
<div class="literalblock">
|
||||
<div class="content monospaced">
|
||||
<pre>*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*</pre>
|
||||
</div></div>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
-X <em>xheights_file</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ]
|
||||
</p>
|
||||
<div class="literalblock">
|
||||
<div class="content monospaced">
|
||||
<pre>*font_name* *xheight*</pre>
|
||||
</div></div>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
-output_trainer <em>trainer</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Output, Optional) Filename for output trainer.
|
||||
</p>
|
||||
</dd>
|
||||
</dl></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1)</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated
|
||||
2024-05-19 13:04:22 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,803 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<meta name="generator" content="AsciiDoc 10.2.0">
|
||||
<title>CNTRAINING(1)</title>
|
||||
<style type="text/css">
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 1em 5% 1em 5%;
|
||||
}
|
||||
|
||||
a {
|
||||
color: blue;
|
||||
text-decoration: underline;
|
||||
}
|
||||
a:visited {
|
||||
color: fuchsia;
|
||||
}
|
||||
|
||||
em {
|
||||
font-style: italic;
|
||||
color: navy;
|
||||
}
|
||||
|
||||
strong {
|
||||
font-weight: bold;
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
}
|
||||
|
||||
h1, h2, h3 {
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
padding-top: 0.5em;
|
||||
}
|
||||
h3 {
|
||||
float: left;
|
||||
}
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
hr {
|
||||
border: 1px solid silver;
|
||||
}
|
||||
|
||||
p {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
#email {
|
||||
}
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.admonitionblock {
|
||||
margin-top: 2.0em;
|
||||
margin-bottom: 2.0em;
|
||||
margin-right: 10%;
|
||||
color: #606060;
|
||||
}
|
||||
|
||||
div.content { /* Block element content. */
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
div.title + * {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
td div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content + div.title {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
|
||||
div.verseblock + div.attribution {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.admonitionblock .icon {
|
||||
vertical-align: top;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
text-decoration: underline;
|
||||
color: #527bbd;
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
div.admonitionblock td.content {
|
||||
padding-left: 0.5em;
|
||||
border-left: 3px solid #dddddd;
|
||||
}
|
||||
|
||||
div.exampleblock > div.content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
dt {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0;
|
||||
font-style: normal;
|
||||
color: navy;
|
||||
}
|
||||
dd > *:first-child {
|
||||
margin-top: 0.1em;
|
||||
}
|
||||
|
||||
ul, ol {
|
||||
list-style-position: outside;
|
||||
}
|
||||
ol.arabic {
|
||||
list-style-type: decimal;
|
||||
}
|
||||
ol.loweralpha {
|
||||
list-style-type: lower-alpha;
|
||||
}
|
||||
ol.upperalpha {
|
||||
list-style-type: upper-alpha;
|
||||
}
|
||||
ol.lowerroman {
|
||||
list-style-type: lower-roman;
|
||||
}
|
||||
ol.upperroman {
|
||||
list-style-type: upper-roman;
|
||||
}
|
||||
|
||||
div.compact ul, div.compact ol,
|
||||
div.compact p, div.compact p,
|
||||
div.compact div, div.compact div {
|
||||
margin-top: 0.1em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
div.hdlist tr {
|
||||
padding-bottom: 15px;
|
||||
}
|
||||
dt.hdlist1.strong, td.hdlist1.strong {
|
||||
font-weight: bold;
|
||||
}
|
||||
td.hdlist1 {
|
||||
vertical-align: top;
|
||||
font-style: normal;
|
||||
padding-right: 0.8em;
|
||||
color: navy;
|
||||
}
|
||||
td.hdlist2 {
|
||||
vertical-align: top;
|
||||
}
|
||||
div.hdlist.compact tr {
|
||||
margin: 0;
|
||||
padding-bottom: 0;
|
||||
}
|
||||
|
||||
.comment {
|
||||
background: yellow;
|
||||
}
|
||||
|
||||
.footnote, .footnoteref {
|
||||
font-size: 0.8em;
|
||||
}
|
||||
|
||||
span.footnote, span.footnoteref {
|
||||
vertical-align: super;
|
||||
}
|
||||
|
||||
#footnotes {
|
||||
margin: 20px 0 20px 0;
|
||||
padding: 7px 0 0 0;
|
||||
}
|
||||
|
||||
#footnotes div.footnote {
|
||||
margin: 0 0 5px 0;
|
||||
}
|
||||
|
||||
#footnotes hr {
|
||||
border: none;
|
||||
border-top: 1px solid silver;
|
||||
height: 1px;
|
||||
text-align: left;
|
||||
margin-left: 0;
|
||||
width: 20%;
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
div.toclevel2 {
|
||||
margin-left: 2em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel3 {
|
||||
margin-left: 4em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overridden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
var iterate = function (el) {
|
||||
for (var i = el.firstChild; i != null; i = i.nextSibling) {
|
||||
if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
|
||||
var mo = re.exec(i.tagName);
|
||||
if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
|
||||
result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
|
||||
}
|
||||
iterate(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
iterate(el);
|
||||
return result;
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
if (entry.element.id == "")
|
||||
entry.element.id = "_toc_" + i;
|
||||
var a = document.createElement("a");
|
||||
a.href = "#" + entry.element.id;
|
||||
a.appendChild(document.createTextNode(entry.text));
|
||||
var div = document.createElement("div");
|
||||
div.appendChild(a);
|
||||
div.className = "toclevel" + entry.toclevel;
|
||||
toc.appendChild(div);
|
||||
}
|
||||
if (entries.length == 0)
|
||||
toc.parentNode.removeChild(toc);
|
||||
},
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Footnotes generator
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* Based on footnote generation code from:
|
||||
* http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
}
|
||||
if (n == 0)
|
||||
noteholder.parentNode.removeChild(noteholder);
|
||||
else {
|
||||
// Process footnoterefs.
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnoteref") {
|
||||
var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
|
||||
href = href.match(/#.*/)[0]; // Because IE return full URL.
|
||||
n = refs[href];
|
||||
spans[i].innerHTML =
|
||||
"[<a href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body class="article">
|
||||
<div id="header">
|
||||
<h1>CNTRAINING(1)</h1>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_name">NAME</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>cntraining - character normalization training for Tesseract</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><strong>cntraining</strong> [-D <em>dir</em>] <em>FILE</em>…</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>cntraining takes a list of .tr files, from which it generates the
|
||||
<strong>normproto</strong> data file (the character normalization sensitivity
|
||||
prototypes).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_options">OPTIONS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="dlist"><dl>
|
||||
<dt class="hdlist1">
|
||||
-D <em>dir</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Directory to write output files to.
|
||||
</p>
|
||||
</dd>
|
||||
</dl></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1), shapeclustering(1), mftraining(1)</p></div>
|
||||
<div class="paragraph"><p><a href="https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html">https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (c) Hewlett-Packard Company, 1988
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated
|
||||
2024-05-19 13:04:22 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,888 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<meta name="generator" content="AsciiDoc 10.2.0">
|
||||
<title>COMBINE_LANG_MODEL(1)</title>
|
||||
<style type="text/css">
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 1em 5% 1em 5%;
|
||||
}
|
||||
|
||||
a {
|
||||
color: blue;
|
||||
text-decoration: underline;
|
||||
}
|
||||
a:visited {
|
||||
color: fuchsia;
|
||||
}
|
||||
|
||||
em {
|
||||
font-style: italic;
|
||||
color: navy;
|
||||
}
|
||||
|
||||
strong {
|
||||
font-weight: bold;
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
}
|
||||
|
||||
h1, h2, h3 {
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
padding-top: 0.5em;
|
||||
}
|
||||
h3 {
|
||||
float: left;
|
||||
}
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
hr {
|
||||
border: 1px solid silver;
|
||||
}
|
||||
|
||||
p {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
#email {
|
||||
}
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.admonitionblock {
|
||||
margin-top: 2.0em;
|
||||
margin-bottom: 2.0em;
|
||||
margin-right: 10%;
|
||||
color: #606060;
|
||||
}
|
||||
|
||||
div.content { /* Block element content. */
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
div.title + * {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
td div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content + div.title {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
|
||||
div.verseblock + div.attribution {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.admonitionblock .icon {
|
||||
vertical-align: top;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
text-decoration: underline;
|
||||
color: #527bbd;
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
div.admonitionblock td.content {
|
||||
padding-left: 0.5em;
|
||||
border-left: 3px solid #dddddd;
|
||||
}
|
||||
|
||||
div.exampleblock > div.content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
dt {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0;
|
||||
font-style: normal;
|
||||
color: navy;
|
||||
}
|
||||
dd > *:first-child {
|
||||
margin-top: 0.1em;
|
||||
}
|
||||
|
||||
ul, ol {
|
||||
list-style-position: outside;
|
||||
}
|
||||
ol.arabic {
|
||||
list-style-type: decimal;
|
||||
}
|
||||
ol.loweralpha {
|
||||
list-style-type: lower-alpha;
|
||||
}
|
||||
ol.upperalpha {
|
||||
list-style-type: upper-alpha;
|
||||
}
|
||||
ol.lowerroman {
|
||||
list-style-type: lower-roman;
|
||||
}
|
||||
ol.upperroman {
|
||||
list-style-type: upper-roman;
|
||||
}
|
||||
|
||||
div.compact ul, div.compact ol,
|
||||
div.compact p, div.compact p,
|
||||
div.compact div, div.compact div {
|
||||
margin-top: 0.1em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
div.hdlist tr {
|
||||
padding-bottom: 15px;
|
||||
}
|
||||
dt.hdlist1.strong, td.hdlist1.strong {
|
||||
font-weight: bold;
|
||||
}
|
||||
td.hdlist1 {
|
||||
vertical-align: top;
|
||||
font-style: normal;
|
||||
padding-right: 0.8em;
|
||||
color: navy;
|
||||
}
|
||||
td.hdlist2 {
|
||||
vertical-align: top;
|
||||
}
|
||||
div.hdlist.compact tr {
|
||||
margin: 0;
|
||||
padding-bottom: 0;
|
||||
}
|
||||
|
||||
.comment {
|
||||
background: yellow;
|
||||
}
|
||||
|
||||
.footnote, .footnoteref {
|
||||
font-size: 0.8em;
|
||||
}
|
||||
|
||||
span.footnote, span.footnoteref {
|
||||
vertical-align: super;
|
||||
}
|
||||
|
||||
#footnotes {
|
||||
margin: 20px 0 20px 0;
|
||||
padding: 7px 0 0 0;
|
||||
}
|
||||
|
||||
#footnotes div.footnote {
|
||||
margin: 0 0 5px 0;
|
||||
}
|
||||
|
||||
#footnotes hr {
|
||||
border: none;
|
||||
border-top: 1px solid silver;
|
||||
height: 1px;
|
||||
text-align: left;
|
||||
margin-left: 0;
|
||||
width: 20%;
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
div.toclevel2 {
|
||||
margin-left: 2em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel3 {
|
||||
margin-left: 4em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overridden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
var iterate = function (el) {
|
||||
for (var i = el.firstChild; i != null; i = i.nextSibling) {
|
||||
if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
|
||||
var mo = re.exec(i.tagName);
|
||||
if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
|
||||
result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
|
||||
}
|
||||
iterate(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
iterate(el);
|
||||
return result;
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
if (entry.element.id == "")
|
||||
entry.element.id = "_toc_" + i;
|
||||
var a = document.createElement("a");
|
||||
a.href = "#" + entry.element.id;
|
||||
a.appendChild(document.createTextNode(entry.text));
|
||||
var div = document.createElement("div");
|
||||
div.appendChild(a);
|
||||
div.className = "toclevel" + entry.toclevel;
|
||||
toc.appendChild(div);
|
||||
}
|
||||
if (entries.length == 0)
|
||||
toc.parentNode.removeChild(toc);
|
||||
},
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Footnotes generator
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* Based on footnote generation code from:
|
||||
* http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
}
|
||||
if (n == 0)
|
||||
noteholder.parentNode.removeChild(noteholder);
|
||||
else {
|
||||
// Process footnoterefs.
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnoteref") {
|
||||
var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
|
||||
href = href.match(/#.*/)[0]; // Because IE return full URL.
|
||||
n = refs[href];
|
||||
spans[i].innerHTML =
|
||||
"[<a href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
COMBINE_LANG_MODEL(1) Manual Page
|
||||
</h1>
|
||||
<h2>NAME</h2>
|
||||
<div class="sectionbody">
|
||||
<p>combine_lang_model -
|
||||
generate starter traineddata
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><strong>combine_lang_model</strong> --input_unicharset <em>filename</em> --script_dir <em>dirname</em> --output_dir <em>rootdir</em> --lang <em>lang</em> [--lang_is_rtl] [pass_through_recoder] [--words file --puncs file --numbers file]</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>combine_lang_model(1) generates a starter traineddata file that can be used to train an LSTM-based neural network model. It takes as input a unicharset and an optional set of wordlists. It eliminates the need to run set_unicharset_properties(1), wordlist2dawg(1), some non-existent binary to generate the recoder (unicode compressor), and finally combine_tessdata(1).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_options">OPTIONS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="dlist"><dl>
|
||||
<dt class="hdlist1">
|
||||
<em>--lang lang</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
The language to use.
|
||||
Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>--script_dir PATH</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Directory name for input script unicharsets. It should point to the location of langdata (github repo) directory. (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>--input_unicharset FILE</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Unicharset to complete and use in encoding. It can be a hand-created file with incomplete fields. Its basic and script properties will be set before it is used. (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>--lang_is_rtl BOOL</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
True if language being processed is written right-to-left (eg Arabic/Hebrew). (type:bool default:false)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>--pass_through_recoder BOOL</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
If true, the recoder is a simple pass-through of the unicharset. Otherwise, potentially a compression of it by encoding Hangul in Jamos, decomposing multi-unicode symbols into sequences of unicodes, and encoding Han using the data in the radical_table_data, which must be the content of the file: langdata/radical-stroke.txt. (type:bool default:false)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>--version_str STRING</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
An arbitrary version label to add to traineddata file (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>--words FILE</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Optional) File listing words to use for the system dictionary (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>--numbers FILE</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Optional) File listing number patterns (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>--puncs FILE</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Optional) File listing punctuation patterns. The words/puncs/numbers lists may be all empty. If any are non-empty then puncs must be non-empty. (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>--output_dir PATH</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Root directory for output files. Output files will be written to <output_dir>/<lang>/<lang>.* (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
</dl></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_history">HISTORY</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>combine_lang_model(1) was first made available for tesseract4.00.00alpha.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_resources">RESOURCES</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Main web site: <a href="https://github.com/tesseract-ocr">https://github.com/tesseract-ocr</a><br>
|
||||
Information on training tesseract LSTM: <a href="https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00.html">https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00.html</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1)</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated
|
||||
2024-05-19 13:04:22 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
|
@ -1,802 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<meta name="generator" content="AsciiDoc 10.2.0">
|
||||
<title>DAWG2WORDLIST(1)</title>
|
||||
<style type="text/css">
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 1em 5% 1em 5%;
|
||||
}
|
||||
|
||||
a {
|
||||
color: blue;
|
||||
text-decoration: underline;
|
||||
}
|
||||
a:visited {
|
||||
color: fuchsia;
|
||||
}
|
||||
|
||||
em {
|
||||
font-style: italic;
|
||||
color: navy;
|
||||
}
|
||||
|
||||
strong {
|
||||
font-weight: bold;
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
}
|
||||
|
||||
h1, h2, h3 {
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
padding-top: 0.5em;
|
||||
}
|
||||
h3 {
|
||||
float: left;
|
||||
}
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
hr {
|
||||
border: 1px solid silver;
|
||||
}
|
||||
|
||||
p {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
#email {
|
||||
}
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.admonitionblock {
|
||||
margin-top: 2.0em;
|
||||
margin-bottom: 2.0em;
|
||||
margin-right: 10%;
|
||||
color: #606060;
|
||||
}
|
||||
|
||||
div.content { /* Block element content. */
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
div.title + * {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
td div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content + div.title {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
|
||||
div.verseblock + div.attribution {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.admonitionblock .icon {
|
||||
vertical-align: top;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
text-decoration: underline;
|
||||
color: #527bbd;
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
div.admonitionblock td.content {
|
||||
padding-left: 0.5em;
|
||||
border-left: 3px solid #dddddd;
|
||||
}
|
||||
|
||||
div.exampleblock > div.content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
dt {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0;
|
||||
font-style: normal;
|
||||
color: navy;
|
||||
}
|
||||
dd > *:first-child {
|
||||
margin-top: 0.1em;
|
||||
}
|
||||
|
||||
ul, ol {
|
||||
list-style-position: outside;
|
||||
}
|
||||
ol.arabic {
|
||||
list-style-type: decimal;
|
||||
}
|
||||
ol.loweralpha {
|
||||
list-style-type: lower-alpha;
|
||||
}
|
||||
ol.upperalpha {
|
||||
list-style-type: upper-alpha;
|
||||
}
|
||||
ol.lowerroman {
|
||||
list-style-type: lower-roman;
|
||||
}
|
||||
ol.upperroman {
|
||||
list-style-type: upper-roman;
|
||||
}
|
||||
|
||||
div.compact ul, div.compact ol,
|
||||
div.compact p, div.compact p,
|
||||
div.compact div, div.compact div {
|
||||
margin-top: 0.1em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
div.hdlist tr {
|
||||
padding-bottom: 15px;
|
||||
}
|
||||
dt.hdlist1.strong, td.hdlist1.strong {
|
||||
font-weight: bold;
|
||||
}
|
||||
td.hdlist1 {
|
||||
vertical-align: top;
|
||||
font-style: normal;
|
||||
padding-right: 0.8em;
|
||||
color: navy;
|
||||
}
|
||||
td.hdlist2 {
|
||||
vertical-align: top;
|
||||
}
|
||||
div.hdlist.compact tr {
|
||||
margin: 0;
|
||||
padding-bottom: 0;
|
||||
}
|
||||
|
||||
.comment {
|
||||
background: yellow;
|
||||
}
|
||||
|
||||
.footnote, .footnoteref {
|
||||
font-size: 0.8em;
|
||||
}
|
||||
|
||||
span.footnote, span.footnoteref {
|
||||
vertical-align: super;
|
||||
}
|
||||
|
||||
#footnotes {
|
||||
margin: 20px 0 20px 0;
|
||||
padding: 7px 0 0 0;
|
||||
}
|
||||
|
||||
#footnotes div.footnote {
|
||||
margin: 0 0 5px 0;
|
||||
}
|
||||
|
||||
#footnotes hr {
|
||||
border: none;
|
||||
border-top: 1px solid silver;
|
||||
height: 1px;
|
||||
text-align: left;
|
||||
margin-left: 0;
|
||||
width: 20%;
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
div.toclevel2 {
|
||||
margin-left: 2em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel3 {
|
||||
margin-left: 4em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overridden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
var iterate = function (el) {
|
||||
for (var i = el.firstChild; i != null; i = i.nextSibling) {
|
||||
if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
|
||||
var mo = re.exec(i.tagName);
|
||||
if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
|
||||
result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
|
||||
}
|
||||
iterate(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
iterate(el);
|
||||
return result;
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
if (entry.element.id == "")
|
||||
entry.element.id = "_toc_" + i;
|
||||
var a = document.createElement("a");
|
||||
a.href = "#" + entry.element.id;
|
||||
a.appendChild(document.createTextNode(entry.text));
|
||||
var div = document.createElement("div");
|
||||
div.appendChild(a);
|
||||
div.className = "toclevel" + entry.toclevel;
|
||||
toc.appendChild(div);
|
||||
}
|
||||
if (entries.length == 0)
|
||||
toc.parentNode.removeChild(toc);
|
||||
},
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Footnotes generator
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* Based on footnote generation code from:
|
||||
* http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
}
|
||||
if (n == 0)
|
||||
noteholder.parentNode.removeChild(noteholder);
|
||||
else {
|
||||
// Process footnoterefs.
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnoteref") {
|
||||
var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
|
||||
href = href.match(/#.*/)[0]; // Because IE return full URL.
|
||||
n = refs[href];
|
||||
spans[i].innerHTML =
|
||||
"[<a href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
DAWG2WORDLIST(1) Manual Page
|
||||
</h1>
|
||||
<h2>NAME</h2>
|
||||
<div class="sectionbody">
|
||||
<p>dawg2wordlist -
|
||||
convert a Tesseract DAWG to a wordlist
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><strong>dawg2wordlist</strong> <em>UNICHARSET</em> <em>DAWG</em> <em>WORDLIST</em></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>dawg2wordlist(1) converts a Tesseract Directed Acyclic Word
|
||||
Graph (DAWG) to a list of words using a unicharset as key.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_options">OPTIONS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><em>UNICHARSET</em>
|
||||
The unicharset of the language. This is the unicharset
|
||||
generated by mftraining(1).</p></div>
|
||||
<div class="paragraph"><p><em>DAWG</em>
|
||||
The input DAWG, created by wordlist2dawg(1)</p></div>
|
||||
<div class="paragraph"><p><em>WORDLIST</em>
|
||||
Plain text (output) file in UTF-8, one word per line</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5),
|
||||
combine_tessdata(1)</p></div>
|
||||
<div class="paragraph"><p><a href="https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html">https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated
|
||||
2024-05-19 13:04:22 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,51 +0,0 @@
|
|||
Ray Smith (lead developer) <theraysmith@gmail.com>
|
||||
Ahmad Abdulkader
|
||||
Rika Antonova
|
||||
Nicholas Beato
|
||||
Jeff Breidenbach
|
||||
Samuel Charron
|
||||
Phil Cheatle
|
||||
Simon Crouch
|
||||
David Eger
|
||||
Sheelagh Huddleston
|
||||
Dan Johnson
|
||||
Rajesh Katikam
|
||||
Thomas Kielbus
|
||||
Dar-Shyang Lee
|
||||
Zongyi (Joe) Liu
|
||||
Robert Moss
|
||||
Chris Newton
|
||||
Michael Reimer
|
||||
Marius Renn
|
||||
Raquel Romano
|
||||
Christy Russon
|
||||
Shobhit Saxena
|
||||
Mark Seaman
|
||||
Faisal Shafait
|
||||
Hiroshi Takenaka
|
||||
Ranjith Unnikrishnan
|
||||
Joern Wanke
|
||||
Ping Ping Xiu
|
||||
Andrew Ziem
|
||||
Oscar Zuniga
|
||||
|
||||
Community Contributors:
|
||||
Zdenko Podobný (Maintainer)
|
||||
Jim Regan (Maintainer)
|
||||
James R Barlow
|
||||
Stefan Brechtken
|
||||
Thomas Breuel
|
||||
Amit Dovev
|
||||
Martin Ettl
|
||||
Shree Devi Kumar
|
||||
Noah Metzger
|
||||
Tom Morris
|
||||
Tobias Müller
|
||||
Egor Pugin
|
||||
Robert Sachunsky
|
||||
Raf Schietekat
|
||||
Sundar M. Vaidya
|
||||
Robin Watts
|
||||
Stefan Weil
|
||||
Nick White
|
||||
Alexander Zaitsev
|
||||
|
|
@ -1,202 +0,0 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
|
@ -1,134 +0,0 @@
|
|||
# Tesseract OCR
|
||||
|
||||
[](https://ci.appveyor.com/project/zdenop/tesseract/)
|
||||
[](https://github.com/tesseract-ocr/tesseract/actions/workflows/sw.yml)\
|
||||
[](https://scan.coverity.com/projects/tesseract-ocr)
|
||||
[](https://github.com/tesseract-ocr/tesseract/security/code-scanning)
|
||||
[](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=2&q=proj:tesseract-ocr)
|
||||
\
|
||||
[](https://raw.githubusercontent.com/tesseract-ocr/tesseract/main/LICENSE)
|
||||
[](https://github.com/tesseract-ocr/tesseract/releases/)
|
||||
|
||||
## Table of Contents
|
||||
|
||||
* [Tesseract OCR](#tesseract-ocr)
|
||||
* [About](#about)
|
||||
* [Brief history](#brief-history)
|
||||
* [Installing Tesseract](#installing-tesseract)
|
||||
* [Running Tesseract](#running-tesseract)
|
||||
* [For developers](#for-developers)
|
||||
* [Support](#support)
|
||||
* [License](#license)
|
||||
* [Dependencies](#dependencies)
|
||||
* [Latest Version of README](#latest-version-of-readme)
|
||||
|
||||
## About
|
||||
|
||||
This package contains an **OCR engine** - `libtesseract` and a **command line program** - `tesseract`.
|
||||
|
||||
Tesseract 4 adds a new neural net (LSTM) based [OCR engine](https://en.wikipedia.org/wiki/Optical_character_recognition) which is focused on line recognition, but also still supports the legacy Tesseract OCR engine of Tesseract 3 which works by recognizing character patterns. Compatibility with Tesseract 3 is enabled by using the Legacy OCR Engine mode (--oem 0).
|
||||
It also needs [traineddata](https://tesseract-ocr.github.io/tessdoc/Data-Files.html) files which support the legacy engine, for example those from the [tessdata](https://github.com/tesseract-ocr/tessdata) repository.
|
||||
|
||||
Stefan Weil is the current lead developer. Ray Smith was the lead developer until 2018. The maintainer is Zdenko Podobny. For a list of contributors see [AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/main/AUTHORS)
|
||||
and GitHub's log of [contributors](https://github.com/tesseract-ocr/tesseract/graphs/contributors).
|
||||
|
||||
Tesseract has **unicode (UTF-8) support**, and can **recognize [more than 100 languages](https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html)** "out of the box".
|
||||
|
||||
Tesseract supports **[various image formats](https://tesseract-ocr.github.io/tessdoc/InputFormats)** including PNG, JPEG and TIFF.
|
||||
|
||||
Tesseract supports **various output formats**: plain text, hOCR (HTML), PDF, invisible-text-only PDF, TSV, ALTO and PAGE.
|
||||
|
||||
You should note that in many cases, in order to get better OCR results, you'll need to **[improve the quality](https://tesseract-ocr.github.io/tessdoc/ImproveQuality.html) of the image** you are giving Tesseract.
|
||||
|
||||
This project **does not include a GUI application**. If you need one, please see the [3rdParty](https://tesseract-ocr.github.io/tessdoc/User-Projects-%E2%80%93-3rdParty.html) documentation.
|
||||
|
||||
Tesseract **can be trained to recognize other languages**.
|
||||
See [Tesseract Training](https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html) for more information.
|
||||
|
||||
## Brief history
|
||||
|
||||
Tesseract was originally developed at Hewlett-Packard Laboratories Bristol UK and at Hewlett-Packard Co, Greeley Colorado USA between 1985 and 1994, with some more changes made in 1996 to port to Windows, and some C++izing in 1998. In 2005 Tesseract was open sourced by HP. From 2006 until November 2018 it was developed by Google.
|
||||
|
||||
Major version 5 is the current stable version and started with release
|
||||
[5.0.0](https://github.com/tesseract-ocr/tesseract/releases/tag/5.0.0) on November 30, 2021. Newer minor versions and bugfix versions are available from
|
||||
[GitHub](https://github.com/tesseract-ocr/tesseract/releases/).
|
||||
|
||||
Latest source code is available from [main branch on GitHub](https://github.com/tesseract-ocr/tesseract/tree/main).
|
||||
Open issues can be found in [issue tracker](https://github.com/tesseract-ocr/tesseract/issues),
|
||||
and [planning documentation](https://tesseract-ocr.github.io/tessdoc/Planning.html).
|
||||
|
||||
See **[Release Notes](https://tesseract-ocr.github.io/tessdoc/ReleaseNotes.html)**
|
||||
and **[Change Log](https://github.com/tesseract-ocr/tesseract/blob/main/ChangeLog)** for more details of the releases.
|
||||
|
||||
## Installing Tesseract
|
||||
|
||||
You can either [Install Tesseract via pre-built binary package](https://tesseract-ocr.github.io/tessdoc/Installation.html)
|
||||
or [build it from source](https://tesseract-ocr.github.io/tessdoc/Compiling.html).
|
||||
|
||||
Before building Tesseract from source, please check that your system has a compiler which is one of the [supported compilers](https://tesseract-ocr.github.io/tessdoc/supported-compilers.html).
|
||||
|
||||
## Running Tesseract
|
||||
|
||||
Basic **[command line usage](https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html)**:
|
||||
|
||||
tesseract imagename outputbase [-l lang] [--oem ocrenginemode] [--psm pagesegmode] [configfiles...]
|
||||
|
||||
For more information about the various command line options use `tesseract --help` or `man tesseract`.
|
||||
|
||||
Examples can be found in the [documentation](https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html#simplest-invocation-to-ocr-an-image).
|
||||
|
||||
## For developers
|
||||
|
||||
Developers can use `libtesseract` [C](https://github.com/tesseract-ocr/tesseract/blob/main/include/tesseract/capi.h) or
|
||||
[C++](https://github.com/tesseract-ocr/tesseract/blob/main/include/tesseract/baseapi.h) API to build their own application. If you need bindings to `libtesseract` for other programming languages, please see the
|
||||
[wrapper](https://tesseract-ocr.github.io/tessdoc/AddOns.html#tesseract-wrappers) section in the AddOns documentation.
|
||||
|
||||
Documentation of Tesseract generated from source code by doxygen can be found on [tesseract-ocr.github.io](https://tesseract-ocr.github.io/).
|
||||
|
||||
## Support
|
||||
|
||||
Before you submit an issue, please review **[the guidelines for this repository](https://github.com/tesseract-ocr/tesseract/blob/main/CONTRIBUTING.md)**.
|
||||
|
||||
For support, first read the [documentation](https://tesseract-ocr.github.io/tessdoc/),
|
||||
particularly the [FAQ](https://tesseract-ocr.github.io/tessdoc/FAQ.html) to see if your problem is addressed there.
|
||||
If not, search the [Tesseract user forum](https://groups.google.com/g/tesseract-ocr), the [Tesseract developer forum](https://groups.google.com/g/tesseract-dev) and [past issues](https://github.com/tesseract-ocr/tesseract/issues), and if you still can't find what you need, ask for support in the mailing-lists.
|
||||
|
||||
Mailing-lists:
|
||||
|
||||
* [tesseract-ocr](https://groups.google.com/g/tesseract-ocr) - For tesseract users.
|
||||
* [tesseract-dev](https://groups.google.com/g/tesseract-dev) - For tesseract developers.
|
||||
|
||||
Please report an issue only for a **bug**, not for asking questions.
|
||||
|
||||
## License
|
||||
|
||||
The code in this repository is licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
**NOTE**: This software depends on other packages that may be licensed under different open source licenses.
|
||||
|
||||
Tesseract uses [Leptonica library](http://leptonica.com/) which essentially
|
||||
uses a [BSD 2-clause license](http://leptonica.com/about-the-license.html).
|
||||
|
||||
## Dependencies
|
||||
|
||||
Tesseract uses [Leptonica library](https://github.com/DanBloomberg/leptonica)
|
||||
for opening input images (e.g. not documents like pdf).
|
||||
It is suggested to use leptonica with built-in support for [zlib](https://zlib.net),
|
||||
[png](https://sourceforge.net/projects/libpng) and
|
||||
[tiff](http://www.simplesystems.org/libtiff) (for multipage tiff).
|
||||
|
||||
## Latest Version of README
|
||||
|
||||
For the latest online version of the README.md see:
|
||||
|
||||
<https://github.com/tesseract-ocr/tesseract/blob/main/README.md>
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -1,847 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<meta name="generator" content="AsciiDoc 10.2.0">
|
||||
<title>LSTMEVAL(1)</title>
|
||||
<style type="text/css">
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 1em 5% 1em 5%;
|
||||
}
|
||||
|
||||
a {
|
||||
color: blue;
|
||||
text-decoration: underline;
|
||||
}
|
||||
a:visited {
|
||||
color: fuchsia;
|
||||
}
|
||||
|
||||
em {
|
||||
font-style: italic;
|
||||
color: navy;
|
||||
}
|
||||
|
||||
strong {
|
||||
font-weight: bold;
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
}
|
||||
|
||||
h1, h2, h3 {
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
padding-top: 0.5em;
|
||||
}
|
||||
h3 {
|
||||
float: left;
|
||||
}
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
hr {
|
||||
border: 1px solid silver;
|
||||
}
|
||||
|
||||
p {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
#email {
|
||||
}
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.admonitionblock {
|
||||
margin-top: 2.0em;
|
||||
margin-bottom: 2.0em;
|
||||
margin-right: 10%;
|
||||
color: #606060;
|
||||
}
|
||||
|
||||
div.content { /* Block element content. */
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
div.title + * {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
td div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content + div.title {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
|
||||
div.verseblock + div.attribution {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.admonitionblock .icon {
|
||||
vertical-align: top;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
text-decoration: underline;
|
||||
color: #527bbd;
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
div.admonitionblock td.content {
|
||||
padding-left: 0.5em;
|
||||
border-left: 3px solid #dddddd;
|
||||
}
|
||||
|
||||
div.exampleblock > div.content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
dt {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0;
|
||||
font-style: normal;
|
||||
color: navy;
|
||||
}
|
||||
dd > *:first-child {
|
||||
margin-top: 0.1em;
|
||||
}
|
||||
|
||||
ul, ol {
|
||||
list-style-position: outside;
|
||||
}
|
||||
ol.arabic {
|
||||
list-style-type: decimal;
|
||||
}
|
||||
ol.loweralpha {
|
||||
list-style-type: lower-alpha;
|
||||
}
|
||||
ol.upperalpha {
|
||||
list-style-type: upper-alpha;
|
||||
}
|
||||
ol.lowerroman {
|
||||
list-style-type: lower-roman;
|
||||
}
|
||||
ol.upperroman {
|
||||
list-style-type: upper-roman;
|
||||
}
|
||||
|
||||
div.compact ul, div.compact ol,
|
||||
div.compact p, div.compact p,
|
||||
div.compact div, div.compact div {
|
||||
margin-top: 0.1em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
div.hdlist tr {
|
||||
padding-bottom: 15px;
|
||||
}
|
||||
dt.hdlist1.strong, td.hdlist1.strong {
|
||||
font-weight: bold;
|
||||
}
|
||||
td.hdlist1 {
|
||||
vertical-align: top;
|
||||
font-style: normal;
|
||||
padding-right: 0.8em;
|
||||
color: navy;
|
||||
}
|
||||
td.hdlist2 {
|
||||
vertical-align: top;
|
||||
}
|
||||
div.hdlist.compact tr {
|
||||
margin: 0;
|
||||
padding-bottom: 0;
|
||||
}
|
||||
|
||||
.comment {
|
||||
background: yellow;
|
||||
}
|
||||
|
||||
.footnote, .footnoteref {
|
||||
font-size: 0.8em;
|
||||
}
|
||||
|
||||
span.footnote, span.footnoteref {
|
||||
vertical-align: super;
|
||||
}
|
||||
|
||||
#footnotes {
|
||||
margin: 20px 0 20px 0;
|
||||
padding: 7px 0 0 0;
|
||||
}
|
||||
|
||||
#footnotes div.footnote {
|
||||
margin: 0 0 5px 0;
|
||||
}
|
||||
|
||||
#footnotes hr {
|
||||
border: none;
|
||||
border-top: 1px solid silver;
|
||||
height: 1px;
|
||||
text-align: left;
|
||||
margin-left: 0;
|
||||
width: 20%;
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
div.toclevel2 {
|
||||
margin-left: 2em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel3 {
|
||||
margin-left: 4em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overridden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
var iterate = function (el) {
|
||||
for (var i = el.firstChild; i != null; i = i.nextSibling) {
|
||||
if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
|
||||
var mo = re.exec(i.tagName);
|
||||
if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
|
||||
result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
|
||||
}
|
||||
iterate(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
iterate(el);
|
||||
return result;
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
if (entry.element.id == "")
|
||||
entry.element.id = "_toc_" + i;
|
||||
var a = document.createElement("a");
|
||||
a.href = "#" + entry.element.id;
|
||||
a.appendChild(document.createTextNode(entry.text));
|
||||
var div = document.createElement("div");
|
||||
div.appendChild(a);
|
||||
div.className = "toclevel" + entry.toclevel;
|
||||
toc.appendChild(div);
|
||||
}
|
||||
if (entries.length == 0)
|
||||
toc.parentNode.removeChild(toc);
|
||||
},
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Footnotes generator
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* Based on footnote generation code from:
|
||||
* http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
}
|
||||
if (n == 0)
|
||||
noteholder.parentNode.removeChild(noteholder);
|
||||
else {
|
||||
// Process footnoterefs.
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnoteref") {
|
||||
var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
|
||||
href = href.match(/#.*/)[0]; // Because IE return full URL.
|
||||
n = refs[href];
|
||||
spans[i].innerHTML =
|
||||
"[<a href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
LSTMEVAL(1) Manual Page
|
||||
</h1>
|
||||
<h2>NAME</h2>
|
||||
<div class="sectionbody">
|
||||
<p>lstmeval -
|
||||
Evaluation program for LSTM-based networks.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><strong>lstmeval</strong> --model <em>lang.lstm|modelname_checkpoint|modelname_N.NN_NN_NN.checkpoint</em> [--traineddata lang/lang.traineddata] --eval_listfile <em>lang.eval_files.txt</em> [--verbosity N] [--max_image_MB NNNN]</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>lstmeval(1) evaluates LSTM-based networks. Either a recognition model or a training checkpoint can be given as input for evaluation along with a list of lstmf files. If evaluating a training checkpoint, <em>--traineddata</em> should also be specified. Intermediate training checkpoints can also be used.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_options">OPTIONS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="dlist"><dl>
|
||||
<dt class="hdlist1">
|
||||
<em>--model FILE</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Name of model file (training or recognition) (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>--traineddata FILE</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
If model is a training checkpoint, then traineddata must be the traineddata file that was given to the trainer (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>--eval_listfile FILE</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
File listing sample files in lstmf training format. (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>--max_image_MB INT</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Max memory to use for images. (type:int default:2000)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>--verbosity INT</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Amount of diagnosting information to output (0-2). (type:int default:1)
|
||||
</p>
|
||||
</dd>
|
||||
</dl></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_history">HISTORY</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>lstmeval(1) was first made available for tesseract4.00.00alpha.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_resources">RESOURCES</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Main web site: <a href="https://github.com/tesseract-ocr">https://github.com/tesseract-ocr</a><br>
|
||||
Information on training tesseract LSTM: <a href="https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00.html">https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00.html</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1)</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated
|
||||
2024-05-19 13:04:22 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,999 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<meta name="generator" content="AsciiDoc 10.2.0">
|
||||
<title>LSTMTRAINING(1)</title>
|
||||
<style type="text/css">
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 1em 5% 1em 5%;
|
||||
}
|
||||
|
||||
a {
|
||||
color: blue;
|
||||
text-decoration: underline;
|
||||
}
|
||||
a:visited {
|
||||
color: fuchsia;
|
||||
}
|
||||
|
||||
em {
|
||||
font-style: italic;
|
||||
color: navy;
|
||||
}
|
||||
|
||||
strong {
|
||||
font-weight: bold;
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
}
|
||||
|
||||
h1, h2, h3 {
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
padding-top: 0.5em;
|
||||
}
|
||||
h3 {
|
||||
float: left;
|
||||
}
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
hr {
|
||||
border: 1px solid silver;
|
||||
}
|
||||
|
||||
p {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
#email {
|
||||
}
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.admonitionblock {
|
||||
margin-top: 2.0em;
|
||||
margin-bottom: 2.0em;
|
||||
margin-right: 10%;
|
||||
color: #606060;
|
||||
}
|
||||
|
||||
div.content { /* Block element content. */
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
div.title + * {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
td div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content + div.title {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
|
||||
div.verseblock + div.attribution {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.admonitionblock .icon {
|
||||
vertical-align: top;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
text-decoration: underline;
|
||||
color: #527bbd;
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
div.admonitionblock td.content {
|
||||
padding-left: 0.5em;
|
||||
border-left: 3px solid #dddddd;
|
||||
}
|
||||
|
||||
div.exampleblock > div.content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
dt {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0;
|
||||
font-style: normal;
|
||||
color: navy;
|
||||
}
|
||||
dd > *:first-child {
|
||||
margin-top: 0.1em;
|
||||
}
|
||||
|
||||
ul, ol {
|
||||
list-style-position: outside;
|
||||
}
|
||||
ol.arabic {
|
||||
list-style-type: decimal;
|
||||
}
|
||||
ol.loweralpha {
|
||||
list-style-type: lower-alpha;
|
||||
}
|
||||
ol.upperalpha {
|
||||
list-style-type: upper-alpha;
|
||||
}
|
||||
ol.lowerroman {
|
||||
list-style-type: lower-roman;
|
||||
}
|
||||
ol.upperroman {
|
||||
list-style-type: upper-roman;
|
||||
}
|
||||
|
||||
div.compact ul, div.compact ol,
|
||||
div.compact p, div.compact p,
|
||||
div.compact div, div.compact div {
|
||||
margin-top: 0.1em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
div.hdlist tr {
|
||||
padding-bottom: 15px;
|
||||
}
|
||||
dt.hdlist1.strong, td.hdlist1.strong {
|
||||
font-weight: bold;
|
||||
}
|
||||
td.hdlist1 {
|
||||
vertical-align: top;
|
||||
font-style: normal;
|
||||
padding-right: 0.8em;
|
||||
color: navy;
|
||||
}
|
||||
td.hdlist2 {
|
||||
vertical-align: top;
|
||||
}
|
||||
div.hdlist.compact tr {
|
||||
margin: 0;
|
||||
padding-bottom: 0;
|
||||
}
|
||||
|
||||
.comment {
|
||||
background: yellow;
|
||||
}
|
||||
|
||||
.footnote, .footnoteref {
|
||||
font-size: 0.8em;
|
||||
}
|
||||
|
||||
span.footnote, span.footnoteref {
|
||||
vertical-align: super;
|
||||
}
|
||||
|
||||
#footnotes {
|
||||
margin: 20px 0 20px 0;
|
||||
padding: 7px 0 0 0;
|
||||
}
|
||||
|
||||
#footnotes div.footnote {
|
||||
margin: 0 0 5px 0;
|
||||
}
|
||||
|
||||
#footnotes hr {
|
||||
border: none;
|
||||
border-top: 1px solid silver;
|
||||
height: 1px;
|
||||
text-align: left;
|
||||
margin-left: 0;
|
||||
width: 20%;
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
div.toclevel2 {
|
||||
margin-left: 2em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel3 {
|
||||
margin-left: 4em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overridden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
var iterate = function (el) {
|
||||
for (var i = el.firstChild; i != null; i = i.nextSibling) {
|
||||
if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
|
||||
var mo = re.exec(i.tagName);
|
||||
if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
|
||||
result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
|
||||
}
|
||||
iterate(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
iterate(el);
|
||||
return result;
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
if (entry.element.id == "")
|
||||
entry.element.id = "_toc_" + i;
|
||||
var a = document.createElement("a");
|
||||
a.href = "#" + entry.element.id;
|
||||
a.appendChild(document.createTextNode(entry.text));
|
||||
var div = document.createElement("div");
|
||||
div.appendChild(a);
|
||||
div.className = "toclevel" + entry.toclevel;
|
||||
toc.appendChild(div);
|
||||
}
|
||||
if (entries.length == 0)
|
||||
toc.parentNode.removeChild(toc);
|
||||
},
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Footnotes generator
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* Based on footnote generation code from:
|
||||
* http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
}
|
||||
if (n == 0)
|
||||
noteholder.parentNode.removeChild(noteholder);
|
||||
else {
|
||||
// Process footnoterefs.
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnoteref") {
|
||||
var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
|
||||
href = href.match(/#.*/)[0]; // Because IE return full URL.
|
||||
n = refs[href];
|
||||
spans[i].innerHTML =
|
||||
"[<a href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
LSTMTRAINING(1) Manual Page
|
||||
</h1>
|
||||
<h2>NAME</h2>
|
||||
<div class="sectionbody">
|
||||
<p>lstmtraining -
|
||||
Training program for LSTM-based networks.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><strong>lstmtraining</strong>
|
||||
--continue_from <em>train_output_dir/continue_from_lang.lstm</em>
|
||||
--old_traineddata <em>bestdata_dir/continue_from_lang.traineddata</em>
|
||||
--traineddata <em>train_output_dir/lang/lang.traineddata</em>
|
||||
--max_iterations <em>NNN</em>
|
||||
--debug_interval <em>0|-1</em>
|
||||
--train_listfile <em>train_output_dir/lang.training_files.txt</em>
|
||||
--model_output <em>train_output_dir/newlstmmodel</em></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>lstmtraining(1) trains LSTM-based networks using a list of lstmf files and starter traineddata file as the main input. Training from scratch is not recommended to be done by users. Finetuning (example command shown in synopsis above) or replacing a layer options can be used instead. Different options apply to different types of training.
|
||||
Read the [training documentation](<a href="https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00.html">https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00.html</a>) for details.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_options">OPTIONS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="dlist"><dl>
|
||||
<dt class="hdlist1">
|
||||
'--debug_interval '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
How often to display the alignment. (type:int default:0)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--net_mode '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Controls network behavior. (type:int default:192)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--perfect_sample_delay '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
How many imperfect samples between perfect ones. (type:int default:0)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--max_image_MB '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Max memory to use for images. (type:int default:6000)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--append_index '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Index in continue_from Network at which to attach the new network defined by net_spec (type:int default:-1)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--max_iterations '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
If set, exit after this many iterations. A negative value is interpreted as epochs, 0 means infinite iterations. (type:int default:0)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--target_error_rate '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Final error rate in percent. (type:double default:0.01)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--weight_range '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Range of initial random weights. (type:double default:0.1)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--learning_rate '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Weight factor for new deltas. (type:double default:0.001)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--momentum '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Decay factor for repeating deltas. (type:double default:0.5)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--adam_beta '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Decay factor for repeating deltas. (type:double default:0.999)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--stop_training '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Just convert the training model to a runtime model. (type:bool default:false)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--convert_to_int '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Convert the recognition model to an integer model. (type:bool default:false)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--sequential_training '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Use the training files sequentially instead of round-robin. (type:bool default:false)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--debug_network '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Get info on distribution of weight values (type:bool default:false)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--randomly_rotate '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Train OSD and randomly turn training samples upside-down (type:bool default:false)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--net_spec '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Network specification (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--continue_from '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Existing model to extend (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--model_output '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Basename for output models (type:string default:lstmtrain)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--train_listfile '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
File listing training files in lstmf training format. (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--eval_listfile '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
File listing eval files in lstmf training format. (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--traineddata '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Starter traineddata with combined Dawgs/Unicharset/Recoder for language model (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
'--old_traineddata '
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
When changing the character set, this specifies the traineddata with the old character set that is to be replaced (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
</dl></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_history">HISTORY</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>lstmtraining(1) was first made available for tesseract4.00.00alpha.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_resources">RESOURCES</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Main web site: <a href="https://github.com/tesseract-ocr">https://github.com/tesseract-ocr</a><br>
|
||||
Information on training tesseract LSTM: <a href="https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00.html">https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00.html</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1)</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated
|
||||
2024-05-19 13:04:22 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,833 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<meta name="generator" content="AsciiDoc 10.2.0">
|
||||
<title>MERGE_UNICHARSETS(1)</title>
|
||||
<style type="text/css">
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 1em 5% 1em 5%;
|
||||
}
|
||||
|
||||
a {
|
||||
color: blue;
|
||||
text-decoration: underline;
|
||||
}
|
||||
a:visited {
|
||||
color: fuchsia;
|
||||
}
|
||||
|
||||
em {
|
||||
font-style: italic;
|
||||
color: navy;
|
||||
}
|
||||
|
||||
strong {
|
||||
font-weight: bold;
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
}
|
||||
|
||||
h1, h2, h3 {
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
padding-top: 0.5em;
|
||||
}
|
||||
h3 {
|
||||
float: left;
|
||||
}
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
hr {
|
||||
border: 1px solid silver;
|
||||
}
|
||||
|
||||
p {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
#email {
|
||||
}
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.admonitionblock {
|
||||
margin-top: 2.0em;
|
||||
margin-bottom: 2.0em;
|
||||
margin-right: 10%;
|
||||
color: #606060;
|
||||
}
|
||||
|
||||
div.content { /* Block element content. */
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
div.title + * {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
td div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content + div.title {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
|
||||
div.verseblock + div.attribution {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.admonitionblock .icon {
|
||||
vertical-align: top;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
text-decoration: underline;
|
||||
color: #527bbd;
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
div.admonitionblock td.content {
|
||||
padding-left: 0.5em;
|
||||
border-left: 3px solid #dddddd;
|
||||
}
|
||||
|
||||
div.exampleblock > div.content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
dt {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0;
|
||||
font-style: normal;
|
||||
color: navy;
|
||||
}
|
||||
dd > *:first-child {
|
||||
margin-top: 0.1em;
|
||||
}
|
||||
|
||||
ul, ol {
|
||||
list-style-position: outside;
|
||||
}
|
||||
ol.arabic {
|
||||
list-style-type: decimal;
|
||||
}
|
||||
ol.loweralpha {
|
||||
list-style-type: lower-alpha;
|
||||
}
|
||||
ol.upperalpha {
|
||||
list-style-type: upper-alpha;
|
||||
}
|
||||
ol.lowerroman {
|
||||
list-style-type: lower-roman;
|
||||
}
|
||||
ol.upperroman {
|
||||
list-style-type: upper-roman;
|
||||
}
|
||||
|
||||
div.compact ul, div.compact ol,
|
||||
div.compact p, div.compact p,
|
||||
div.compact div, div.compact div {
|
||||
margin-top: 0.1em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
div.hdlist tr {
|
||||
padding-bottom: 15px;
|
||||
}
|
||||
dt.hdlist1.strong, td.hdlist1.strong {
|
||||
font-weight: bold;
|
||||
}
|
||||
td.hdlist1 {
|
||||
vertical-align: top;
|
||||
font-style: normal;
|
||||
padding-right: 0.8em;
|
||||
color: navy;
|
||||
}
|
||||
td.hdlist2 {
|
||||
vertical-align: top;
|
||||
}
|
||||
div.hdlist.compact tr {
|
||||
margin: 0;
|
||||
padding-bottom: 0;
|
||||
}
|
||||
|
||||
.comment {
|
||||
background: yellow;
|
||||
}
|
||||
|
||||
.footnote, .footnoteref {
|
||||
font-size: 0.8em;
|
||||
}
|
||||
|
||||
span.footnote, span.footnoteref {
|
||||
vertical-align: super;
|
||||
}
|
||||
|
||||
#footnotes {
|
||||
margin: 20px 0 20px 0;
|
||||
padding: 7px 0 0 0;
|
||||
}
|
||||
|
||||
#footnotes div.footnote {
|
||||
margin: 0 0 5px 0;
|
||||
}
|
||||
|
||||
#footnotes hr {
|
||||
border: none;
|
||||
border-top: 1px solid silver;
|
||||
height: 1px;
|
||||
text-align: left;
|
||||
margin-left: 0;
|
||||
width: 20%;
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
div.toclevel2 {
|
||||
margin-left: 2em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel3 {
|
||||
margin-left: 4em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overridden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
var iterate = function (el) {
|
||||
for (var i = el.firstChild; i != null; i = i.nextSibling) {
|
||||
if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
|
||||
var mo = re.exec(i.tagName);
|
||||
if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
|
||||
result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
|
||||
}
|
||||
iterate(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
iterate(el);
|
||||
return result;
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
if (entry.element.id == "")
|
||||
entry.element.id = "_toc_" + i;
|
||||
var a = document.createElement("a");
|
||||
a.href = "#" + entry.element.id;
|
||||
a.appendChild(document.createTextNode(entry.text));
|
||||
var div = document.createElement("div");
|
||||
div.appendChild(a);
|
||||
div.className = "toclevel" + entry.toclevel;
|
||||
toc.appendChild(div);
|
||||
}
|
||||
if (entries.length == 0)
|
||||
toc.parentNode.removeChild(toc);
|
||||
},
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Footnotes generator
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* Based on footnote generation code from:
|
||||
* http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
}
|
||||
if (n == 0)
|
||||
noteholder.parentNode.removeChild(noteholder);
|
||||
else {
|
||||
// Process footnoterefs.
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnoteref") {
|
||||
var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
|
||||
href = href.match(/#.*/)[0]; // Because IE return full URL.
|
||||
n = refs[href];
|
||||
spans[i].innerHTML =
|
||||
"[<a href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
MERGE_UNICHARSETS(1) Manual Page
|
||||
</h1>
|
||||
<h2>NAME</h2>
|
||||
<div class="sectionbody">
|
||||
<p>merge_unicharsets -
|
||||
Simple tool to merge two or more unicharsets.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><strong>merge_unicharsets</strong> <em>unicharset-in-1</em> … <em>unicharset-in-n</em> <em>unicharset-out</em></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>merge_unicharsets(1) is a simple tool to merge two or more unicharsets.
|
||||
It could be used to create a combined unicharset for a script-level engine,
|
||||
like the new Latin or Devanagari.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_in_out_arguments">IN/OUT ARGUMENTS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="dlist"><dl>
|
||||
<dt class="hdlist1">
|
||||
<em>unicharset-in-1</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Input) The name of the first unicharset file to be merged.
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>unicharset-in-n</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Input) The name of the nth unicharset file to be merged.
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>unicharset-out</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Output) The name of the merged unicharset file.
|
||||
</p>
|
||||
</dd>
|
||||
</dl></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_history">HISTORY</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>merge_unicharsets(1) was first made available for tesseract4.00.00alpha.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_resources">RESOURCES</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Main web site: <a href="https://github.com/tesseract-ocr">https://github.com/tesseract-ocr</a><br>
|
||||
Information on training tesseract LSTM: <a href="https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00.html">https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00.html</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1)</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated
|
||||
2024-05-19 13:04:22 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,847 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<meta name="generator" content="AsciiDoc 10.2.0">
|
||||
<title>MFTRAINING(1)</title>
|
||||
<style type="text/css">
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 1em 5% 1em 5%;
|
||||
}
|
||||
|
||||
a {
|
||||
color: blue;
|
||||
text-decoration: underline;
|
||||
}
|
||||
a:visited {
|
||||
color: fuchsia;
|
||||
}
|
||||
|
||||
em {
|
||||
font-style: italic;
|
||||
color: navy;
|
||||
}
|
||||
|
||||
strong {
|
||||
font-weight: bold;
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
}
|
||||
|
||||
h1, h2, h3 {
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
padding-top: 0.5em;
|
||||
}
|
||||
h3 {
|
||||
float: left;
|
||||
}
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
hr {
|
||||
border: 1px solid silver;
|
||||
}
|
||||
|
||||
p {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
#email {
|
||||
}
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.admonitionblock {
|
||||
margin-top: 2.0em;
|
||||
margin-bottom: 2.0em;
|
||||
margin-right: 10%;
|
||||
color: #606060;
|
||||
}
|
||||
|
||||
div.content { /* Block element content. */
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
div.title + * {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
td div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content + div.title {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
|
||||
div.verseblock + div.attribution {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.admonitionblock .icon {
|
||||
vertical-align: top;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
text-decoration: underline;
|
||||
color: #527bbd;
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
div.admonitionblock td.content {
|
||||
padding-left: 0.5em;
|
||||
border-left: 3px solid #dddddd;
|
||||
}
|
||||
|
||||
div.exampleblock > div.content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
dt {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0;
|
||||
font-style: normal;
|
||||
color: navy;
|
||||
}
|
||||
dd > *:first-child {
|
||||
margin-top: 0.1em;
|
||||
}
|
||||
|
||||
ul, ol {
|
||||
list-style-position: outside;
|
||||
}
|
||||
ol.arabic {
|
||||
list-style-type: decimal;
|
||||
}
|
||||
ol.loweralpha {
|
||||
list-style-type: lower-alpha;
|
||||
}
|
||||
ol.upperalpha {
|
||||
list-style-type: upper-alpha;
|
||||
}
|
||||
ol.lowerroman {
|
||||
list-style-type: lower-roman;
|
||||
}
|
||||
ol.upperroman {
|
||||
list-style-type: upper-roman;
|
||||
}
|
||||
|
||||
div.compact ul, div.compact ol,
|
||||
div.compact p, div.compact p,
|
||||
div.compact div, div.compact div {
|
||||
margin-top: 0.1em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
div.hdlist tr {
|
||||
padding-bottom: 15px;
|
||||
}
|
||||
dt.hdlist1.strong, td.hdlist1.strong {
|
||||
font-weight: bold;
|
||||
}
|
||||
td.hdlist1 {
|
||||
vertical-align: top;
|
||||
font-style: normal;
|
||||
padding-right: 0.8em;
|
||||
color: navy;
|
||||
}
|
||||
td.hdlist2 {
|
||||
vertical-align: top;
|
||||
}
|
||||
div.hdlist.compact tr {
|
||||
margin: 0;
|
||||
padding-bottom: 0;
|
||||
}
|
||||
|
||||
.comment {
|
||||
background: yellow;
|
||||
}
|
||||
|
||||
.footnote, .footnoteref {
|
||||
font-size: 0.8em;
|
||||
}
|
||||
|
||||
span.footnote, span.footnoteref {
|
||||
vertical-align: super;
|
||||
}
|
||||
|
||||
#footnotes {
|
||||
margin: 20px 0 20px 0;
|
||||
padding: 7px 0 0 0;
|
||||
}
|
||||
|
||||
#footnotes div.footnote {
|
||||
margin: 0 0 5px 0;
|
||||
}
|
||||
|
||||
#footnotes hr {
|
||||
border: none;
|
||||
border-top: 1px solid silver;
|
||||
height: 1px;
|
||||
text-align: left;
|
||||
margin-left: 0;
|
||||
width: 20%;
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
div.toclevel2 {
|
||||
margin-left: 2em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel3 {
|
||||
margin-left: 4em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overridden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
var iterate = function (el) {
|
||||
for (var i = el.firstChild; i != null; i = i.nextSibling) {
|
||||
if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
|
||||
var mo = re.exec(i.tagName);
|
||||
if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
|
||||
result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
|
||||
}
|
||||
iterate(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
iterate(el);
|
||||
return result;
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
if (entry.element.id == "")
|
||||
entry.element.id = "_toc_" + i;
|
||||
var a = document.createElement("a");
|
||||
a.href = "#" + entry.element.id;
|
||||
a.appendChild(document.createTextNode(entry.text));
|
||||
var div = document.createElement("div");
|
||||
div.appendChild(a);
|
||||
div.className = "toclevel" + entry.toclevel;
|
||||
toc.appendChild(div);
|
||||
}
|
||||
if (entries.length == 0)
|
||||
toc.parentNode.removeChild(toc);
|
||||
},
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Footnotes generator
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* Based on footnote generation code from:
|
||||
* http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
}
|
||||
if (n == 0)
|
||||
noteholder.parentNode.removeChild(noteholder);
|
||||
else {
|
||||
// Process footnoterefs.
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnoteref") {
|
||||
var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
|
||||
href = href.match(/#.*/)[0]; // Because IE return full URL.
|
||||
n = refs[href];
|
||||
spans[i].innerHTML =
|
||||
"[<a href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
MFTRAINING(1) Manual Page
|
||||
</h1>
|
||||
<h2>NAME</h2>
|
||||
<div class="sectionbody">
|
||||
<p>mftraining -
|
||||
feature training for Tesseract
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>mftraining -U <em>unicharset</em> -O <em>lang.unicharset</em> <em>FILE</em>…</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>mftraining takes a list of .tr files, from which it generates the
|
||||
files <strong>inttemp</strong> (the shape prototypes), <strong>shapetable</strong>, and <strong>pffmtable</strong>
|
||||
(the number of expected features for each character). (A fourth file
|
||||
called Microfeat is also written by this program, but it is not used.)</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_options">OPTIONS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="dlist"><dl>
|
||||
<dt class="hdlist1">
|
||||
-U <em>FILE</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Input) The unicharset generated by unicharset_extractor(1)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
-F <em>font_properties_file</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Input) font properties file, each line is of the following form, where each field other than the font name is 0 or 1:
|
||||
</p>
|
||||
<div class="literalblock">
|
||||
<div class="content monospaced">
|
||||
<pre>*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*</pre>
|
||||
</div></div>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
-X <em>xheights_file</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ]
|
||||
</p>
|
||||
<div class="literalblock">
|
||||
<div class="content monospaced">
|
||||
<pre>*font_name* *xheight*</pre>
|
||||
</div></div>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
-D <em>dir</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Directory to write output files to.
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
-O <em>FILE</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Output) The output unicharset that will be given to combine_tessdata(1)
|
||||
</p>
|
||||
</dd>
|
||||
</dl></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
|
||||
shapeclustering(1), unicharset(5)</p></div>
|
||||
<div class="paragraph"><p><a href="https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html">https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) Hewlett-Packard Company, 1988
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated
|
||||
2024-05-19 13:04:22 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,831 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<meta name="generator" content="AsciiDoc 10.2.0">
|
||||
<title>SET_UNICHARSET_PROPERTIES(1)</title>
|
||||
<style type="text/css">
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 1em 5% 1em 5%;
|
||||
}
|
||||
|
||||
a {
|
||||
color: blue;
|
||||
text-decoration: underline;
|
||||
}
|
||||
a:visited {
|
||||
color: fuchsia;
|
||||
}
|
||||
|
||||
em {
|
||||
font-style: italic;
|
||||
color: navy;
|
||||
}
|
||||
|
||||
strong {
|
||||
font-weight: bold;
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
}
|
||||
|
||||
h1, h2, h3 {
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
padding-top: 0.5em;
|
||||
}
|
||||
h3 {
|
||||
float: left;
|
||||
}
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
hr {
|
||||
border: 1px solid silver;
|
||||
}
|
||||
|
||||
p {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
#email {
|
||||
}
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.admonitionblock {
|
||||
margin-top: 2.0em;
|
||||
margin-bottom: 2.0em;
|
||||
margin-right: 10%;
|
||||
color: #606060;
|
||||
}
|
||||
|
||||
div.content { /* Block element content. */
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
div.title + * {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
td div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content + div.title {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
|
||||
div.verseblock + div.attribution {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.admonitionblock .icon {
|
||||
vertical-align: top;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
text-decoration: underline;
|
||||
color: #527bbd;
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
div.admonitionblock td.content {
|
||||
padding-left: 0.5em;
|
||||
border-left: 3px solid #dddddd;
|
||||
}
|
||||
|
||||
div.exampleblock > div.content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
dt {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0;
|
||||
font-style: normal;
|
||||
color: navy;
|
||||
}
|
||||
dd > *:first-child {
|
||||
margin-top: 0.1em;
|
||||
}
|
||||
|
||||
ul, ol {
|
||||
list-style-position: outside;
|
||||
}
|
||||
ol.arabic {
|
||||
list-style-type: decimal;
|
||||
}
|
||||
ol.loweralpha {
|
||||
list-style-type: lower-alpha;
|
||||
}
|
||||
ol.upperalpha {
|
||||
list-style-type: upper-alpha;
|
||||
}
|
||||
ol.lowerroman {
|
||||
list-style-type: lower-roman;
|
||||
}
|
||||
ol.upperroman {
|
||||
list-style-type: upper-roman;
|
||||
}
|
||||
|
||||
div.compact ul, div.compact ol,
|
||||
div.compact p, div.compact p,
|
||||
div.compact div, div.compact div {
|
||||
margin-top: 0.1em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
div.hdlist tr {
|
||||
padding-bottom: 15px;
|
||||
}
|
||||
dt.hdlist1.strong, td.hdlist1.strong {
|
||||
font-weight: bold;
|
||||
}
|
||||
td.hdlist1 {
|
||||
vertical-align: top;
|
||||
font-style: normal;
|
||||
padding-right: 0.8em;
|
||||
color: navy;
|
||||
}
|
||||
td.hdlist2 {
|
||||
vertical-align: top;
|
||||
}
|
||||
div.hdlist.compact tr {
|
||||
margin: 0;
|
||||
padding-bottom: 0;
|
||||
}
|
||||
|
||||
.comment {
|
||||
background: yellow;
|
||||
}
|
||||
|
||||
.footnote, .footnoteref {
|
||||
font-size: 0.8em;
|
||||
}
|
||||
|
||||
span.footnote, span.footnoteref {
|
||||
vertical-align: super;
|
||||
}
|
||||
|
||||
#footnotes {
|
||||
margin: 20px 0 20px 0;
|
||||
padding: 7px 0 0 0;
|
||||
}
|
||||
|
||||
#footnotes div.footnote {
|
||||
margin: 0 0 5px 0;
|
||||
}
|
||||
|
||||
#footnotes hr {
|
||||
border: none;
|
||||
border-top: 1px solid silver;
|
||||
height: 1px;
|
||||
text-align: left;
|
||||
margin-left: 0;
|
||||
width: 20%;
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
div.toclevel2 {
|
||||
margin-left: 2em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel3 {
|
||||
margin-left: 4em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overridden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
var iterate = function (el) {
|
||||
for (var i = el.firstChild; i != null; i = i.nextSibling) {
|
||||
if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
|
||||
var mo = re.exec(i.tagName);
|
||||
if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
|
||||
result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
|
||||
}
|
||||
iterate(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
iterate(el);
|
||||
return result;
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
if (entry.element.id == "")
|
||||
entry.element.id = "_toc_" + i;
|
||||
var a = document.createElement("a");
|
||||
a.href = "#" + entry.element.id;
|
||||
a.appendChild(document.createTextNode(entry.text));
|
||||
var div = document.createElement("div");
|
||||
div.appendChild(a);
|
||||
div.className = "toclevel" + entry.toclevel;
|
||||
toc.appendChild(div);
|
||||
}
|
||||
if (entries.length == 0)
|
||||
toc.parentNode.removeChild(toc);
|
||||
},
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Footnotes generator
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* Based on footnote generation code from:
|
||||
* http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
}
|
||||
if (n == 0)
|
||||
noteholder.parentNode.removeChild(noteholder);
|
||||
else {
|
||||
// Process footnoterefs.
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnoteref") {
|
||||
var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
|
||||
href = href.match(/#.*/)[0]; // Because IE return full URL.
|
||||
n = refs[href];
|
||||
spans[i].innerHTML =
|
||||
"[<a href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
SET_UNICHARSET_PROPERTIES(1) Manual Page
|
||||
</h1>
|
||||
<h2>NAME</h2>
|
||||
<div class="sectionbody">
|
||||
<p>set_unicharset_properties -
|
||||
set properties about the unichars
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><strong>set_unicharset_properties</strong> --U <em>input_unicharsetfile</em> --script_dir <em>/path/to/langdata</em> --O <em>output_unicharsetfile</em></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>set_unicharset_properties(1) reads a unicharset file, puts the result in a UNICHARSET object, fills it with properties about the unichars it contains and writes the result back to another unicharset file.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_options">OPTIONS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="dlist"><dl>
|
||||
<dt class="hdlist1">
|
||||
<em>--script_dir /path/to/langdata</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Input) Specify the location of directory for universal script unicharsets and font xheights (type:string default:)
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>--U unicharsetfile</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Input) Specify the location of the unicharset to load as input.
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
<em>--O unicharsetfile</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Output) Specify the location of the unicharset to be written with updated properties.
|
||||
</p>
|
||||
</dd>
|
||||
</dl></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_history">HISTORY</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>set_unicharset_properties(1) was first made available for tesseract version 3.03.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_resources">RESOURCES</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Main web site: <a href="https://github.com/tesseract-ocr">https://github.com/tesseract-ocr</a><br>
|
||||
Information on training: <a href="https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html">https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1)</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated
|
||||
2024-05-19 13:04:22 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,850 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<meta name="generator" content="AsciiDoc 10.2.0">
|
||||
<title>SHAPECLUSTERING(1)</title>
|
||||
<style type="text/css">
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 1em 5% 1em 5%;
|
||||
}
|
||||
|
||||
a {
|
||||
color: blue;
|
||||
text-decoration: underline;
|
||||
}
|
||||
a:visited {
|
||||
color: fuchsia;
|
||||
}
|
||||
|
||||
em {
|
||||
font-style: italic;
|
||||
color: navy;
|
||||
}
|
||||
|
||||
strong {
|
||||
font-weight: bold;
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
}
|
||||
|
||||
h1, h2, h3 {
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
padding-top: 0.5em;
|
||||
}
|
||||
h3 {
|
||||
float: left;
|
||||
}
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
hr {
|
||||
border: 1px solid silver;
|
||||
}
|
||||
|
||||
p {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
#email {
|
||||
}
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.admonitionblock {
|
||||
margin-top: 2.0em;
|
||||
margin-bottom: 2.0em;
|
||||
margin-right: 10%;
|
||||
color: #606060;
|
||||
}
|
||||
|
||||
div.content { /* Block element content. */
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
div.title + * {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
td div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content div.title:first-child {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
div.content + div.title {
|
||||
margin-top: 0.0em;
|
||||
}
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
|
||||
div.verseblock + div.attribution {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.admonitionblock .icon {
|
||||
vertical-align: top;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
text-decoration: underline;
|
||||
color: #527bbd;
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
div.admonitionblock td.content {
|
||||
padding-left: 0.5em;
|
||||
border-left: 3px solid #dddddd;
|
||||
}
|
||||
|
||||
div.exampleblock > div.content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
dt {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0;
|
||||
font-style: normal;
|
||||
color: navy;
|
||||
}
|
||||
dd > *:first-child {
|
||||
margin-top: 0.1em;
|
||||
}
|
||||
|
||||
ul, ol {
|
||||
list-style-position: outside;
|
||||
}
|
||||
ol.arabic {
|
||||
list-style-type: decimal;
|
||||
}
|
||||
ol.loweralpha {
|
||||
list-style-type: lower-alpha;
|
||||
}
|
||||
ol.upperalpha {
|
||||
list-style-type: upper-alpha;
|
||||
}
|
||||
ol.lowerroman {
|
||||
list-style-type: lower-roman;
|
||||
}
|
||||
ol.upperroman {
|
||||
list-style-type: upper-roman;
|
||||
}
|
||||
|
||||
div.compact ul, div.compact ol,
|
||||
div.compact p, div.compact p,
|
||||
div.compact div, div.compact div {
|
||||
margin-top: 0.1em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
div.hdlist tr {
|
||||
padding-bottom: 15px;
|
||||
}
|
||||
dt.hdlist1.strong, td.hdlist1.strong {
|
||||
font-weight: bold;
|
||||
}
|
||||
td.hdlist1 {
|
||||
vertical-align: top;
|
||||
font-style: normal;
|
||||
padding-right: 0.8em;
|
||||
color: navy;
|
||||
}
|
||||
td.hdlist2 {
|
||||
vertical-align: top;
|
||||
}
|
||||
div.hdlist.compact tr {
|
||||
margin: 0;
|
||||
padding-bottom: 0;
|
||||
}
|
||||
|
||||
.comment {
|
||||
background: yellow;
|
||||
}
|
||||
|
||||
.footnote, .footnoteref {
|
||||
font-size: 0.8em;
|
||||
}
|
||||
|
||||
span.footnote, span.footnoteref {
|
||||
vertical-align: super;
|
||||
}
|
||||
|
||||
#footnotes {
|
||||
margin: 20px 0 20px 0;
|
||||
padding: 7px 0 0 0;
|
||||
}
|
||||
|
||||
#footnotes div.footnote {
|
||||
margin: 0 0 5px 0;
|
||||
}
|
||||
|
||||
#footnotes hr {
|
||||
border: none;
|
||||
border-top: 1px solid silver;
|
||||
height: 1px;
|
||||
text-align: left;
|
||||
margin-left: 0;
|
||||
width: 20%;
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
div.toclevel2 {
|
||||
margin-left: 2em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel3 {
|
||||
margin-left: 4em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overridden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
var iterate = function (el) {
|
||||
for (var i = el.firstChild; i != null; i = i.nextSibling) {
|
||||
if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
|
||||
var mo = re.exec(i.tagName);
|
||||
if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
|
||||
result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
|
||||
}
|
||||
iterate(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
iterate(el);
|
||||
return result;
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
if (entry.element.id == "")
|
||||
entry.element.id = "_toc_" + i;
|
||||
var a = document.createElement("a");
|
||||
a.href = "#" + entry.element.id;
|
||||
a.appendChild(document.createTextNode(entry.text));
|
||||
var div = document.createElement("div");
|
||||
div.appendChild(a);
|
||||
div.className = "toclevel" + entry.toclevel;
|
||||
toc.appendChild(div);
|
||||
}
|
||||
if (entries.length == 0)
|
||||
toc.parentNode.removeChild(toc);
|
||||
},
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Footnotes generator
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* Based on footnote generation code from:
|
||||
* http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
}
|
||||
if (n == 0)
|
||||
noteholder.parentNode.removeChild(noteholder);
|
||||
else {
|
||||
// Process footnoterefs.
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnoteref") {
|
||||
var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
|
||||
href = href.match(/#.*/)[0]; // Because IE return full URL.
|
||||
n = refs[href];
|
||||
spans[i].innerHTML =
|
||||
"[<a href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
SHAPECLUSTERING(1) Manual Page
|
||||
</h1>
|
||||
<h2>NAME</h2>
|
||||
<div class="sectionbody">
|
||||
<p>shapeclustering -
|
||||
shape clustering training for Tesseract
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>shapeclustering -D <em>output_dir</em>
|
||||
-U <em>unicharset</em> -O <em>mfunicharset</em>
|
||||
-F <em>font_props</em> -X <em>xheights</em>
|
||||
<em>FILE</em>…</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>shapeclustering(1) takes extracted feature .tr files (generated by
|
||||
tesseract(1) run in a special mode from box files) and produces a
|
||||
file <strong>shapetable</strong> and an enhanced unicharset. This program is still
|
||||
experimental, and is not required (yet) for training Tesseract.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_options">OPTIONS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="dlist"><dl>
|
||||
<dt class="hdlist1">
|
||||
-U <em>FILE</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
The unicharset generated by unicharset_extractor(1).
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
-D <em>dir</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
Directory to write output files to.
|
||||
</p>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
-F <em>font_properties_file</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Input) font properties file, where each line is of the following form, where each field other than the font name is 0 or 1:
|
||||
</p>
|
||||
<div class="literalblock">
|
||||
<div class="content monospaced">
|
||||
<pre>'font_name' 'italic' 'bold' 'fixed_pitch' 'serif' 'fraktur'</pre>
|
||||
</div></div>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
-X <em>xheights_file</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
(Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ]
|
||||
</p>
|
||||
<div class="literalblock">
|
||||
<div class="content monospaced">
|
||||
<pre>'font_name' 'xheight'</pre>
|
||||
</div></div>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
-O <em>FILE</em>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>
|
||||
The output unicharset that will be given to combine_tessdata(1).
|
||||
</p>
|
||||
</dd>
|
||||
</dl></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
|
||||
unicharset(5)</p></div>
|
||||
<div class="paragraph"><p><a href="https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html">https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) Google, 2011
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated
|
||||
2024-05-19 13:04:22 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -1 +0,0 @@
|
|||
tessedit_create_alto 1
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
tessedit_ambigs_training 1
|
||||
load_freq_dawg 0
|
||||
load_punc_dawg 0
|
||||
load_system_dawg 0
|
||||
load_number_dawg 0
|
||||
ambigs_debug_level 3
|
||||
load_fixed_length_dawgs 0
|
||||
|
|
@ -1 +0,0 @@
|
|||
tessedit_zero_rejection T
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
load_bigram_dawg True
|
||||
tessedit_enable_bigram_correction True
|
||||
tessedit_bigram_debug 3
|
||||
save_raw_choices True
|
||||
save_alt_choices True
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
disable_character_fragments T
|
||||
file_type .bl
|
||||
textord_fast_pitch_test T
|
||||
tessedit_zero_rejection T
|
||||
tessedit_minimal_rejection F
|
||||
tessedit_write_rep_codes F
|
||||
edges_children_fix F
|
||||
edges_childarea 0.65
|
||||
edges_boxarea 0.9
|
||||
tessedit_resegment_from_boxes T
|
||||
tessedit_train_from_boxes T
|
||||
textord_no_rejects T
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue