Refactor config file, remove old unused stuff

2025-10-13 01:58:33 +02:00
parent f5dc30dc9b
commit 254073bbaa
3 changed files with 129 additions and 54 deletions
--- a/owocr/config.py
+++ b/owocr/config.py
@@ -30,8 +30,6 @@ parser.add_argument('-es', '--engine_secondary', type=str, default=argparse.SUPP
                    help='OCR engine to use for two-pass processing.')
 parser.add_argument('-p', '--pause_at_startup', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
                    help='Pause at startup.')
-parser.add_argument('-i', '--ignore_flag', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
-                    help='Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).')
 parser.add_argument('-d', '--delete_images', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
                    help='Delete image files after processing when reading from a directory.')
 parser.add_argument('-n', '--notifications', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
@@ -82,7 +80,6 @@ class Config:
        'engine_secondary': '',
        'pause_at_startup': False,
        'auto_pause' : 0,
-        'ignore_flag': False,
        'delete_images': False,
        'engines': [],
        'logger_format': '<green>{time:HH:mm:ss.SSS}</green> | <level>{message}</level>',
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -49,7 +49,7 @@ try:
    from Quartz import CGWindowListCreateImageFromArray, kCGWindowImageBoundsIgnoreFraming, CGRectMake, CGRectNull, CGMainDisplayID, CGWindowListCopyWindowInfo, \
                       CGWindowListCreateDescriptionFromArray, kCGWindowListOptionOnScreenOnly, kCGWindowListExcludeDesktopElements, kCGWindowName, kCGNullWindowID, \
                       CGImageGetWidth, CGImageGetHeight, CGDataProviderCopyData, CGImageGetDataProvider, CGImageGetBytesPerRow, kCGWindowImageNominalResolution
-    from ScreenCaptureKit import SCContentFilter, SCScreenshotManager, SCShareableContent, SCStreamConfiguration, SCCaptureResolutionBest
+    from ScreenCaptureKit import SCContentFilter, SCScreenshotManager, SCShareableContent, SCStreamConfiguration, SCCaptureResolutionNominal
 except ImportError:
    pass

@@ -57,7 +57,6 @@ except ImportError:
 class ClipboardThread(threading.Thread):
    def __init__(self):
        super().__init__(daemon=True)
-        self.ignore_flag = config.get_general('ignore_flag')
        self.delay_secs = config.get_general('delay_secs')
        self.last_update = time.time()

@@ -113,12 +112,8 @@ class ClipboardThread(threading.Thread):
                time.sleep(0.1)
            try:
                if win32clipboard.IsClipboardFormatAvailable(win32con.CF_BITMAP) and win32clipboard.IsClipboardFormatAvailable(win32clipboard.CF_DIB):
-                    clipboard_text = ''
-                    if win32clipboard.IsClipboardFormatAvailable(win32clipboard.CF_UNICODETEXT):
-                        clipboard_text = win32clipboard.GetClipboardData(win32clipboard.CF_UNICODETEXT)
-                    if self.ignore_flag or clipboard_text != '*ocr_ignore*':
-                        img = win32clipboard.GetClipboardData(win32clipboard.CF_DIB)
-                        image_queue.put((img, False))
+                    img = win32clipboard.GetClipboardData(win32clipboard.CF_DIB)
+                    image_queue.put((img, False))
                win32clipboard.CloseClipboard()
            except pywintypes.error:
                pass
@@ -142,7 +137,7 @@ class ClipboardThread(threading.Thread):
        else:
            is_macos = sys.platform == 'darwin'
            if is_macos:
-                from AppKit import NSPasteboard, NSPasteboardTypeTIFF, NSPasteboardTypeString
+                from AppKit import NSPasteboard, NSPasteboardTypeTIFF
                pasteboard = NSPasteboard.generalPasteboard()
                count = pasteboard.changeCount()
            else:
@@ -164,12 +159,8 @@ class ClipboardThread(threading.Thread):
                                while len(pasteboard.types()) == 0:
                                    time.sleep(0.1)
                                if NSPasteboardTypeTIFF in pasteboard.types():
-                                    clipboard_text = ''
-                                    if NSPasteboardTypeString in pasteboard.types():
-                                        clipboard_text = pasteboard.stringForType_(NSPasteboardTypeString)
-                                    if self.ignore_flag or clipboard_text != '*ocr_ignore*':
-                                        img = self.normalize_macos_clipboard(pasteboard.dataForType_(NSPasteboardTypeTIFF))
-                                        image_queue.put((img, False))
+                                    img = self.normalize_macos_clipboard(pasteboard.dataForType_(NSPasteboardTypeTIFF))
+                                    image_queue.put((img, False))
                    else:
                        old_img = img
                        try:
@@ -178,7 +169,6 @@ class ClipboardThread(threading.Thread):
                            pass
                        else:
                            if (process_clipboard and isinstance(img, Image.Image) and \
-                                (self.ignore_flag or pyperclipfix.paste() != '*ocr_ignore*') and \
                                (not self.are_images_identical(img, old_img))):
                                image_queue.put((img, False))

@@ -943,15 +933,14 @@ class ScreenshotThread(threading.Thread):
                content_filter = SCContentFilter.alloc().initWithDesktopIndependentWindow_(target_window)

                frame = content_filter.contentRect()
-                scale = content_filter.pointPixelScale()
-                width = frame.size.width * scale
-                height = frame.size.height * scale
+                width = frame.size.width
+                height = frame.size.height
                configuration = SCStreamConfiguration.alloc().init()
-                configuration.setSourceRect_(CGRectMake(0, 0, frame.size.width, frame.size.height))
+                configuration.setSourceRect_(CGRectMake(0, 0, width, height))
                configuration.setWidth_(width)
                configuration.setHeight_(height)
                configuration.setShowsCursor_(False)
-                configuration.setCaptureResolution_(SCCaptureResolutionBest)
+                configuration.setCaptureResolution_(SCCaptureResolutionNominal)
                configuration.setIgnoreGlobalClipSingleWindow_(True)

                SCScreenshotManager.captureImageWithFilter_configuration_completionHandler_(
--- a/owocr_config.ini
+++ b/owocr_config.ini
@@ -1,61 +1,150 @@
 [general]
-;engines = avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace
-;engine = glens
-;note: engine to use for two-pass processing.
-;engine_secondary =
+;Where to read input images from. Can be either "clipboard", "websocket",
+;"unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.
 ;read_from = clipboard
-;read_from_secondary =
+
+;Optional secondary source to read input images from. Same options as
+;read_from, but they can't both be directory paths.
+;read_from_secondary = 
+
+;Where to save recognized texts to. Can be either "clipboard", "websocket", or
+;a path to a text file.
 ;write_to = clipboard
-;note: this specifies an amount of seconds to wait for auto pausing the program after a successful text recognition. Will be ignored when reading with screen capture. 0 to disable.
-;auto_pause = 0
+
+;OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing",
+;"gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr",
+;"rapidocr", "ocrspace".
+;engine = 
+
+;OCR engine to use for two-pass processing.
+;engine_secondary = 
+
+;Pause at startup.
 ;pause_at_startup = False
-;logger_format = <green>{time:HH:mm:ss.SSS}</green> | <level>{message}</level>
-;engine_color = cyan
-;websocket_port = 7331
-;delay_secs = 0.5
-;notifications = False
-;ignore_flag = False
+
+;Automatically pause the program after the specified amount of seconds since
+;the last successful text recognition. Will be ignored when reading with screen
+;capture. 0 to disable.
+;auto_pause = 0
+
+;Delete image files after processing when reading from a directory.
 ;delete_images = False
-;note: terminal window verbosity. Can be -2 (all recognized text is showed whole), -1 (only timestamps are shown), 0 (nothing is shown but errors), or larger than 0 to cut displayed text to that amount of characters.
-;verbosity = -2
-;note: this specifies a combo to wait on for pausing the program. As an example: <ctrl>+<shift>+p. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
-;combo_pause = <ctrl>+<shift>+p
-;note: this specifies a combo to wait on for switching the OCR engine. As an example: <ctrl>+<shift>+a. To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
-;combo_engine_switch = <ctrl>+<shift>+a
-;note: screen_capture_area can be empty for the coordinate picker, "screen_N" (where N is the screen number starting from 1) for an entire screen, have a manual set of coordinates (x,y,width,height) or a window name (the first matching window title will be used).
-;screen_capture_area =
+
+;Available:
+;avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace
+;engines = avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace
+
+;logger_format = <green>{time:HH:mm:ss.SSS}</green> | <level>{message}</level>
+
+;engine_color = cyan
+
+;delay_secs = 0.5
+
+;websocket_port = 7331
+
+;Show an operating system notification with the detected text. Will be ignored
+;when reading with screen capture and periodic screenshots.
+;notifications = False
+
+;Combo to wait on for pausing the program. As an example: "<ctrl>+<shift>+p".
+;The list of keys can be found here:
+;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
+;combo_pause = 
+
+;Combo to wait on for switching the OCR engine. As an example:
+;"<ctrl>+<shift>+a". To be used with combo_pause. The list of keys can be found
+;here:
+;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
+;combo_engine_switch = 
+
+;Area to target when reading with screen capture. Can be either empty
+;(automatic selector), a set of coordinates (x,y,width,height), "screen_N"
+;(captures a whole screen, where N is the screen number starting from 1) or a
+;window name (the first matching window title will be used).
 ;screen_capture_area = screen_1
 ;screen_capture_area = 400,200,1500,600
 ;screen_capture_area = OBS
-;note: if screen_capture_area is a window name, this can be changed to capture inactive windows too.
-;screen_capture_only_active_windows = True
-;note: delay (in seconds) between screenshots when reading with screen capture. -1 to disable periodic screenshots.
+;screen_capture_area = 
+
+;If capturing with screen capture, subsection of the selected window. Can be
+;either empty (automatic selector), a set of coordinates (x,y,width,height),
+;"window" to use the whole window.
+;screen_capture_window_area = window
+
+;Delay (in seconds) between screenshots when reading with screen capture. -1
+;to disable periodic screenshots.
 ;screen_capture_delay_secs = 0
-;note: when reading with screen capture, delay to wait until text is stable before processing it. -1 waits for two OCR results to be the same. 0 to disable.
+
+;When reading with screen capture and screen_capture_area is a window name,
+;only target the window while it's active.
+;screen_capture_only_active_windows = True
+
+;When reading with screen capture, delay to wait until text is stable before
+;processing it. -1 waits for two OCR results to be the same. 0 to disable.
 ;screen_capture_frame_stabilization = -1
-;note: this specifies a combo to wait on for taking a screenshot. If periodic screenshots are also enabled, any screenshot taken this way bypasses the filtering. Example value: <ctrl>+<shift>+s. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
-;screen_capture_combo = <ctrl>+<shift>+s
+
+;When reading with screen capture and frame stabilization is on, try to
+;recover missed lines from unstable frames. Can lead to increased glitches.
+;screen_capture_line_recovery = True
+
+;When reading with screen capture, try to filter furigana lines.
+;screen_capture_furigana_filter = True
+
+;When reading with screen capture, combo to wait on for taking a screenshot.
+;If periodic screenshots are also enabled, any screenshot taken this way
+;bypasses the filtering. Example value: "<ctrl>+<shift>+s". The list of keys
+;can be found here:
+;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
+;screen_capture_combo = 
+
+;When reading with screen capture, combo to wait on for invoking the
+;coordinate picker to change the screen/window area. Example value:
+;"<ctrl>+<shift>+c". The list of keys can be found here:
+;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
+;coordinate_selector_combo = 
+
 ;screen_capture_old_macos_api = False
-;note: this specifies the language to use for some engines and text filtering while using "screencapture". Valid values: ja: (Japanese) zh: (Chinese) ko: (Korean) ar: (Arabic) ru: (Russian) el: (Greek) he: (Hebrew) th: (Thai)
-;Any other value will use Latin Extended (for most European languages and English).
+
+;Two letter language code for filtering screencapture OCR results. Ex. "ja"
+;for Japanese, "zh" for Chinese, "ko" for Korean, "ar" for Arabic, "ru" for
+;Russian, "el" for Greek, "he" for Hebrew, "th" for Thai. Any other value will
+;use Latin Extended (for most European languages and English).
 ;language = ja
-;note: can be "text" (default) or "json" (to include coordinates).
+
+;The output format for OCR results. Can be "text" (default) or "json" (to
+;include coordinates).
 ;output_format = text
+
+;Terminal window verbosity. Can be -2 (all recognized text is showed whole,
+;default), -1 (only timestamps are shown), 0 (nothing is shown but errors), or
+;larger than 0 to cut displayed text to that amount of characters.
+;verbosity = -2
+
 ;[winrtocr]
 ;url = http://aaa.xxx.yyy.zzz:8000
+
 ;[oneocr]
 ;url = http://aaa.xxx.yyy.zzz:8001
+
 ;[azure]
 ;api_key = api_key_here
+
 ;endpoint = https://YOURPROJECT.cognitiveservices.azure.com/
+
 ;[mangaocr]
 ;pretrained_model_name_or_path = kha-white/manga-ocr-base
+
 ;force_cpu = False
+
 ;[easyocr]
 ;gpu = True
+
 ;[ocrspace]
 ;api_key = api_key_here
+
 ;engine_version = 2
+
 ;[rapidocr]
 ;high_accuracy_detection = False
+
 ;high_accuracy_recognition = True