From 254073bbaa309d0561d0df785225118d3f0780fc Mon Sep 17 00:00:00 2001 From: AuroraWright Date: Mon, 13 Oct 2025 01:58:33 +0200 Subject: [PATCH] Refactor config file, remove old unused stuff --- owocr/config.py | 3 - owocr/run.py | 31 ++++------ owocr_config.ini | 149 +++++++++++++++++++++++++++++++++++++---------- 3 files changed, 129 insertions(+), 54 deletions(-) diff --git a/owocr/config.py b/owocr/config.py index 62ba78a..41fc690 100644 --- a/owocr/config.py +++ b/owocr/config.py @@ -30,8 +30,6 @@ parser.add_argument('-es', '--engine_secondary', type=str, default=argparse.SUPP help='OCR engine to use for two-pass processing.') parser.add_argument('-p', '--pause_at_startup', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS, help='Pause at startup.') -parser.add_argument('-i', '--ignore_flag', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS, - help='Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).') parser.add_argument('-d', '--delete_images', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS, help='Delete image files after processing when reading from a directory.') parser.add_argument('-n', '--notifications', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS, @@ -82,7 +80,6 @@ class Config: 'engine_secondary': '', 'pause_at_startup': False, 'auto_pause' : 0, - 'ignore_flag': False, 'delete_images': False, 'engines': [], 'logger_format': '{time:HH:mm:ss.SSS} | {message}', diff --git a/owocr/run.py b/owocr/run.py index 478748d..87a3146 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -49,7 +49,7 @@ try: from Quartz import CGWindowListCreateImageFromArray, kCGWindowImageBoundsIgnoreFraming, CGRectMake, CGRectNull, CGMainDisplayID, CGWindowListCopyWindowInfo, \ CGWindowListCreateDescriptionFromArray, kCGWindowListOptionOnScreenOnly, kCGWindowListExcludeDesktopElements, kCGWindowName, kCGNullWindowID, \ CGImageGetWidth, CGImageGetHeight, CGDataProviderCopyData, CGImageGetDataProvider, CGImageGetBytesPerRow, kCGWindowImageNominalResolution - from ScreenCaptureKit import SCContentFilter, SCScreenshotManager, SCShareableContent, SCStreamConfiguration, SCCaptureResolutionBest + from ScreenCaptureKit import SCContentFilter, SCScreenshotManager, SCShareableContent, SCStreamConfiguration, SCCaptureResolutionNominal except ImportError: pass @@ -57,7 +57,6 @@ except ImportError: class ClipboardThread(threading.Thread): def __init__(self): super().__init__(daemon=True) - self.ignore_flag = config.get_general('ignore_flag') self.delay_secs = config.get_general('delay_secs') self.last_update = time.time() @@ -113,12 +112,8 @@ class ClipboardThread(threading.Thread): time.sleep(0.1) try: if win32clipboard.IsClipboardFormatAvailable(win32con.CF_BITMAP) and win32clipboard.IsClipboardFormatAvailable(win32clipboard.CF_DIB): - clipboard_text = '' - if win32clipboard.IsClipboardFormatAvailable(win32clipboard.CF_UNICODETEXT): - clipboard_text = win32clipboard.GetClipboardData(win32clipboard.CF_UNICODETEXT) - if self.ignore_flag or clipboard_text != '*ocr_ignore*': - img = win32clipboard.GetClipboardData(win32clipboard.CF_DIB) - image_queue.put((img, False)) + img = win32clipboard.GetClipboardData(win32clipboard.CF_DIB) + image_queue.put((img, False)) win32clipboard.CloseClipboard() except pywintypes.error: pass @@ -142,7 +137,7 @@ class ClipboardThread(threading.Thread): else: is_macos = sys.platform == 'darwin' if is_macos: - from AppKit import NSPasteboard, NSPasteboardTypeTIFF, NSPasteboardTypeString + from AppKit import NSPasteboard, NSPasteboardTypeTIFF pasteboard = NSPasteboard.generalPasteboard() count = pasteboard.changeCount() else: @@ -164,12 +159,8 @@ class ClipboardThread(threading.Thread): while len(pasteboard.types()) == 0: time.sleep(0.1) if NSPasteboardTypeTIFF in pasteboard.types(): - clipboard_text = '' - if NSPasteboardTypeString in pasteboard.types(): - clipboard_text = pasteboard.stringForType_(NSPasteboardTypeString) - if self.ignore_flag or clipboard_text != '*ocr_ignore*': - img = self.normalize_macos_clipboard(pasteboard.dataForType_(NSPasteboardTypeTIFF)) - image_queue.put((img, False)) + img = self.normalize_macos_clipboard(pasteboard.dataForType_(NSPasteboardTypeTIFF)) + image_queue.put((img, False)) else: old_img = img try: @@ -178,7 +169,6 @@ class ClipboardThread(threading.Thread): pass else: if (process_clipboard and isinstance(img, Image.Image) and \ - (self.ignore_flag or pyperclipfix.paste() != '*ocr_ignore*') and \ (not self.are_images_identical(img, old_img))): image_queue.put((img, False)) @@ -943,15 +933,14 @@ class ScreenshotThread(threading.Thread): content_filter = SCContentFilter.alloc().initWithDesktopIndependentWindow_(target_window) frame = content_filter.contentRect() - scale = content_filter.pointPixelScale() - width = frame.size.width * scale - height = frame.size.height * scale + width = frame.size.width + height = frame.size.height configuration = SCStreamConfiguration.alloc().init() - configuration.setSourceRect_(CGRectMake(0, 0, frame.size.width, frame.size.height)) + configuration.setSourceRect_(CGRectMake(0, 0, width, height)) configuration.setWidth_(width) configuration.setHeight_(height) configuration.setShowsCursor_(False) - configuration.setCaptureResolution_(SCCaptureResolutionBest) + configuration.setCaptureResolution_(SCCaptureResolutionNominal) configuration.setIgnoreGlobalClipSingleWindow_(True) SCScreenshotManager.captureImageWithFilter_configuration_completionHandler_( diff --git a/owocr_config.ini b/owocr_config.ini index c25de16..1cb2d2c 100644 --- a/owocr_config.ini +++ b/owocr_config.ini @@ -1,61 +1,150 @@ [general] -;engines = avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace -;engine = glens -;note: engine to use for two-pass processing. -;engine_secondary = +;Where to read input images from. Can be either "clipboard", "websocket", +;"unixsocket" (on macOS/Linux), "screencapture", or a path to a directory. ;read_from = clipboard -;read_from_secondary = + +;Optional secondary source to read input images from. Same options as +;read_from, but they can't both be directory paths. +;read_from_secondary = + +;Where to save recognized texts to. Can be either "clipboard", "websocket", or +;a path to a text file. ;write_to = clipboard -;note: this specifies an amount of seconds to wait for auto pausing the program after a successful text recognition. Will be ignored when reading with screen capture. 0 to disable. -;auto_pause = 0 + +;OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing", +;"gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", +;"rapidocr", "ocrspace". +;engine = + +;OCR engine to use for two-pass processing. +;engine_secondary = + +;Pause at startup. ;pause_at_startup = False -;logger_format = {time:HH:mm:ss.SSS} | {message} -;engine_color = cyan -;websocket_port = 7331 -;delay_secs = 0.5 -;notifications = False -;ignore_flag = False + +;Automatically pause the program after the specified amount of seconds since +;the last successful text recognition. Will be ignored when reading with screen +;capture. 0 to disable. +;auto_pause = 0 + +;Delete image files after processing when reading from a directory. ;delete_images = False -;note: terminal window verbosity. Can be -2 (all recognized text is showed whole), -1 (only timestamps are shown), 0 (nothing is shown but errors), or larger than 0 to cut displayed text to that amount of characters. -;verbosity = -2 -;note: this specifies a combo to wait on for pausing the program. As an example: ++p. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key -;combo_pause = ++p -;note: this specifies a combo to wait on for switching the OCR engine. As an example: ++a. To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key -;combo_engine_switch = ++a -;note: screen_capture_area can be empty for the coordinate picker, "screen_N" (where N is the screen number starting from 1) for an entire screen, have a manual set of coordinates (x,y,width,height) or a window name (the first matching window title will be used). -;screen_capture_area = + +;Available: +;avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace +;engines = avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace + +;logger_format = {time:HH:mm:ss.SSS} | {message} + +;engine_color = cyan + +;delay_secs = 0.5 + +;websocket_port = 7331 + +;Show an operating system notification with the detected text. Will be ignored +;when reading with screen capture and periodic screenshots. +;notifications = False + +;Combo to wait on for pausing the program. As an example: "++p". +;The list of keys can be found here: +;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key +;combo_pause = + +;Combo to wait on for switching the OCR engine. As an example: +;"++a". To be used with combo_pause. The list of keys can be found +;here: +;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key +;combo_engine_switch = + +;Area to target when reading with screen capture. Can be either empty +;(automatic selector), a set of coordinates (x,y,width,height), "screen_N" +;(captures a whole screen, where N is the screen number starting from 1) or a +;window name (the first matching window title will be used). ;screen_capture_area = screen_1 ;screen_capture_area = 400,200,1500,600 ;screen_capture_area = OBS -;note: if screen_capture_area is a window name, this can be changed to capture inactive windows too. -;screen_capture_only_active_windows = True -;note: delay (in seconds) between screenshots when reading with screen capture. -1 to disable periodic screenshots. +;screen_capture_area = + +;If capturing with screen capture, subsection of the selected window. Can be +;either empty (automatic selector), a set of coordinates (x,y,width,height), +;"window" to use the whole window. +;screen_capture_window_area = window + +;Delay (in seconds) between screenshots when reading with screen capture. -1 +;to disable periodic screenshots. ;screen_capture_delay_secs = 0 -;note: when reading with screen capture, delay to wait until text is stable before processing it. -1 waits for two OCR results to be the same. 0 to disable. + +;When reading with screen capture and screen_capture_area is a window name, +;only target the window while it's active. +;screen_capture_only_active_windows = True + +;When reading with screen capture, delay to wait until text is stable before +;processing it. -1 waits for two OCR results to be the same. 0 to disable. ;screen_capture_frame_stabilization = -1 -;note: this specifies a combo to wait on for taking a screenshot. If periodic screenshots are also enabled, any screenshot taken this way bypasses the filtering. Example value: ++s. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key -;screen_capture_combo = ++s + +;When reading with screen capture and frame stabilization is on, try to +;recover missed lines from unstable frames. Can lead to increased glitches. +;screen_capture_line_recovery = True + +;When reading with screen capture, try to filter furigana lines. +;screen_capture_furigana_filter = True + +;When reading with screen capture, combo to wait on for taking a screenshot. +;If periodic screenshots are also enabled, any screenshot taken this way +;bypasses the filtering. Example value: "++s". The list of keys +;can be found here: +;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key +;screen_capture_combo = + +;When reading with screen capture, combo to wait on for invoking the +;coordinate picker to change the screen/window area. Example value: +;"++c". The list of keys can be found here: +;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key +;coordinate_selector_combo = + ;screen_capture_old_macos_api = False -;note: this specifies the language to use for some engines and text filtering while using "screencapture". Valid values: ja: (Japanese) zh: (Chinese) ko: (Korean) ar: (Arabic) ru: (Russian) el: (Greek) he: (Hebrew) th: (Thai) -;Any other value will use Latin Extended (for most European languages and English). + +;Two letter language code for filtering screencapture OCR results. Ex. "ja" +;for Japanese, "zh" for Chinese, "ko" for Korean, "ar" for Arabic, "ru" for +;Russian, "el" for Greek, "he" for Hebrew, "th" for Thai. Any other value will +;use Latin Extended (for most European languages and English). ;language = ja -;note: can be "text" (default) or "json" (to include coordinates). + +;The output format for OCR results. Can be "text" (default) or "json" (to +;include coordinates). ;output_format = text + +;Terminal window verbosity. Can be -2 (all recognized text is showed whole, +;default), -1 (only timestamps are shown), 0 (nothing is shown but errors), or +;larger than 0 to cut displayed text to that amount of characters. +;verbosity = -2 + ;[winrtocr] ;url = http://aaa.xxx.yyy.zzz:8000 + ;[oneocr] ;url = http://aaa.xxx.yyy.zzz:8001 + ;[azure] ;api_key = api_key_here + ;endpoint = https://YOURPROJECT.cognitiveservices.azure.com/ + ;[mangaocr] ;pretrained_model_name_or_path = kha-white/manga-ocr-base + ;force_cpu = False + ;[easyocr] ;gpu = True + ;[ocrspace] ;api_key = api_key_here + ;engine_version = 2 + ;[rapidocr] ;high_accuracy_detection = False + ;high_accuracy_recognition = True \ No newline at end of file