Refactor config file, remove old unused stuff

This commit is contained in:
AuroraWright
2025-10-13 01:58:33 +02:00
parent f5dc30dc9b
commit 254073bbaa
3 changed files with 129 additions and 54 deletions

View File

@@ -30,8 +30,6 @@ parser.add_argument('-es', '--engine_secondary', type=str, default=argparse.SUPP
help='OCR engine to use for two-pass processing.')
parser.add_argument('-p', '--pause_at_startup', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
help='Pause at startup.')
parser.add_argument('-i', '--ignore_flag', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
help='Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).')
parser.add_argument('-d', '--delete_images', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
help='Delete image files after processing when reading from a directory.')
parser.add_argument('-n', '--notifications', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
@@ -82,7 +80,6 @@ class Config:
'engine_secondary': '',
'pause_at_startup': False,
'auto_pause' : 0,
'ignore_flag': False,
'delete_images': False,
'engines': [],
'logger_format': '<green>{time:HH:mm:ss.SSS}</green> | <level>{message}</level>',

View File

@@ -49,7 +49,7 @@ try:
from Quartz import CGWindowListCreateImageFromArray, kCGWindowImageBoundsIgnoreFraming, CGRectMake, CGRectNull, CGMainDisplayID, CGWindowListCopyWindowInfo, \
CGWindowListCreateDescriptionFromArray, kCGWindowListOptionOnScreenOnly, kCGWindowListExcludeDesktopElements, kCGWindowName, kCGNullWindowID, \
CGImageGetWidth, CGImageGetHeight, CGDataProviderCopyData, CGImageGetDataProvider, CGImageGetBytesPerRow, kCGWindowImageNominalResolution
from ScreenCaptureKit import SCContentFilter, SCScreenshotManager, SCShareableContent, SCStreamConfiguration, SCCaptureResolutionBest
from ScreenCaptureKit import SCContentFilter, SCScreenshotManager, SCShareableContent, SCStreamConfiguration, SCCaptureResolutionNominal
except ImportError:
pass
@@ -57,7 +57,6 @@ except ImportError:
class ClipboardThread(threading.Thread):
def __init__(self):
super().__init__(daemon=True)
self.ignore_flag = config.get_general('ignore_flag')
self.delay_secs = config.get_general('delay_secs')
self.last_update = time.time()
@@ -113,12 +112,8 @@ class ClipboardThread(threading.Thread):
time.sleep(0.1)
try:
if win32clipboard.IsClipboardFormatAvailable(win32con.CF_BITMAP) and win32clipboard.IsClipboardFormatAvailable(win32clipboard.CF_DIB):
clipboard_text = ''
if win32clipboard.IsClipboardFormatAvailable(win32clipboard.CF_UNICODETEXT):
clipboard_text = win32clipboard.GetClipboardData(win32clipboard.CF_UNICODETEXT)
if self.ignore_flag or clipboard_text != '*ocr_ignore*':
img = win32clipboard.GetClipboardData(win32clipboard.CF_DIB)
image_queue.put((img, False))
img = win32clipboard.GetClipboardData(win32clipboard.CF_DIB)
image_queue.put((img, False))
win32clipboard.CloseClipboard()
except pywintypes.error:
pass
@@ -142,7 +137,7 @@ class ClipboardThread(threading.Thread):
else:
is_macos = sys.platform == 'darwin'
if is_macos:
from AppKit import NSPasteboard, NSPasteboardTypeTIFF, NSPasteboardTypeString
from AppKit import NSPasteboard, NSPasteboardTypeTIFF
pasteboard = NSPasteboard.generalPasteboard()
count = pasteboard.changeCount()
else:
@@ -164,12 +159,8 @@ class ClipboardThread(threading.Thread):
while len(pasteboard.types()) == 0:
time.sleep(0.1)
if NSPasteboardTypeTIFF in pasteboard.types():
clipboard_text = ''
if NSPasteboardTypeString in pasteboard.types():
clipboard_text = pasteboard.stringForType_(NSPasteboardTypeString)
if self.ignore_flag or clipboard_text != '*ocr_ignore*':
img = self.normalize_macos_clipboard(pasteboard.dataForType_(NSPasteboardTypeTIFF))
image_queue.put((img, False))
img = self.normalize_macos_clipboard(pasteboard.dataForType_(NSPasteboardTypeTIFF))
image_queue.put((img, False))
else:
old_img = img
try:
@@ -178,7 +169,6 @@ class ClipboardThread(threading.Thread):
pass
else:
if (process_clipboard and isinstance(img, Image.Image) and \
(self.ignore_flag or pyperclipfix.paste() != '*ocr_ignore*') and \
(not self.are_images_identical(img, old_img))):
image_queue.put((img, False))
@@ -943,15 +933,14 @@ class ScreenshotThread(threading.Thread):
content_filter = SCContentFilter.alloc().initWithDesktopIndependentWindow_(target_window)
frame = content_filter.contentRect()
scale = content_filter.pointPixelScale()
width = frame.size.width * scale
height = frame.size.height * scale
width = frame.size.width
height = frame.size.height
configuration = SCStreamConfiguration.alloc().init()
configuration.setSourceRect_(CGRectMake(0, 0, frame.size.width, frame.size.height))
configuration.setSourceRect_(CGRectMake(0, 0, width, height))
configuration.setWidth_(width)
configuration.setHeight_(height)
configuration.setShowsCursor_(False)
configuration.setCaptureResolution_(SCCaptureResolutionBest)
configuration.setCaptureResolution_(SCCaptureResolutionNominal)
configuration.setIgnoreGlobalClipSingleWindow_(True)
SCScreenshotManager.captureImageWithFilter_configuration_completionHandler_(

View File

@@ -1,61 +1,150 @@
[general]
;engines = avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace
;engine = glens
;note: engine to use for two-pass processing.
;engine_secondary =
;Where to read input images from. Can be either "clipboard", "websocket",
;"unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.
;read_from = clipboard
;read_from_secondary =
;Optional secondary source to read input images from. Same options as
;read_from, but they can't both be directory paths.
;read_from_secondary =
;Where to save recognized texts to. Can be either "clipboard", "websocket", or
;a path to a text file.
;write_to = clipboard
;note: this specifies an amount of seconds to wait for auto pausing the program after a successful text recognition. Will be ignored when reading with screen capture. 0 to disable.
;auto_pause = 0
;OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing",
;"gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr",
;"rapidocr", "ocrspace".
;engine =
;OCR engine to use for two-pass processing.
;engine_secondary =
;Pause at startup.
;pause_at_startup = False
;logger_format = <green>{time:HH:mm:ss.SSS}</green> | <level>{message}</level>
;engine_color = cyan
;websocket_port = 7331
;delay_secs = 0.5
;notifications = False
;ignore_flag = False
;Automatically pause the program after the specified amount of seconds since
;the last successful text recognition. Will be ignored when reading with screen
;capture. 0 to disable.
;auto_pause = 0
;Delete image files after processing when reading from a directory.
;delete_images = False
;note: terminal window verbosity. Can be -2 (all recognized text is showed whole), -1 (only timestamps are shown), 0 (nothing is shown but errors), or larger than 0 to cut displayed text to that amount of characters.
;verbosity = -2
;note: this specifies a combo to wait on for pausing the program. As an example: <ctrl>+<shift>+p. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
;combo_pause = <ctrl>+<shift>+p
;note: this specifies a combo to wait on for switching the OCR engine. As an example: <ctrl>+<shift>+a. To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
;combo_engine_switch = <ctrl>+<shift>+a
;note: screen_capture_area can be empty for the coordinate picker, "screen_N" (where N is the screen number starting from 1) for an entire screen, have a manual set of coordinates (x,y,width,height) or a window name (the first matching window title will be used).
;screen_capture_area =
;Available:
;avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace
;engines = avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace
;logger_format = <green>{time:HH:mm:ss.SSS}</green> | <level>{message}</level>
;engine_color = cyan
;delay_secs = 0.5
;websocket_port = 7331
;Show an operating system notification with the detected text. Will be ignored
;when reading with screen capture and periodic screenshots.
;notifications = False
;Combo to wait on for pausing the program. As an example: "<ctrl>+<shift>+p".
;The list of keys can be found here:
;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
;combo_pause =
;Combo to wait on for switching the OCR engine. As an example:
;"<ctrl>+<shift>+a". To be used with combo_pause. The list of keys can be found
;here:
;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
;combo_engine_switch =
;Area to target when reading with screen capture. Can be either empty
;(automatic selector), a set of coordinates (x,y,width,height), "screen_N"
;(captures a whole screen, where N is the screen number starting from 1) or a
;window name (the first matching window title will be used).
;screen_capture_area = screen_1
;screen_capture_area = 400,200,1500,600
;screen_capture_area = OBS
;note: if screen_capture_area is a window name, this can be changed to capture inactive windows too.
;screen_capture_only_active_windows = True
;note: delay (in seconds) between screenshots when reading with screen capture. -1 to disable periodic screenshots.
;screen_capture_area =
;If capturing with screen capture, subsection of the selected window. Can be
;either empty (automatic selector), a set of coordinates (x,y,width,height),
;"window" to use the whole window.
;screen_capture_window_area = window
;Delay (in seconds) between screenshots when reading with screen capture. -1
;to disable periodic screenshots.
;screen_capture_delay_secs = 0
;note: when reading with screen capture, delay to wait until text is stable before processing it. -1 waits for two OCR results to be the same. 0 to disable.
;When reading with screen capture and screen_capture_area is a window name,
;only target the window while it's active.
;screen_capture_only_active_windows = True
;When reading with screen capture, delay to wait until text is stable before
;processing it. -1 waits for two OCR results to be the same. 0 to disable.
;screen_capture_frame_stabilization = -1
;note: this specifies a combo to wait on for taking a screenshot. If periodic screenshots are also enabled, any screenshot taken this way bypasses the filtering. Example value: <ctrl>+<shift>+s. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
;screen_capture_combo = <ctrl>+<shift>+s
;When reading with screen capture and frame stabilization is on, try to
;recover missed lines from unstable frames. Can lead to increased glitches.
;screen_capture_line_recovery = True
;When reading with screen capture, try to filter furigana lines.
;screen_capture_furigana_filter = True
;When reading with screen capture, combo to wait on for taking a screenshot.
;If periodic screenshots are also enabled, any screenshot taken this way
;bypasses the filtering. Example value: "<ctrl>+<shift>+s". The list of keys
;can be found here:
;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
;screen_capture_combo =
;When reading with screen capture, combo to wait on for invoking the
;coordinate picker to change the screen/window area. Example value:
;"<ctrl>+<shift>+c". The list of keys can be found here:
;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
;coordinate_selector_combo =
;screen_capture_old_macos_api = False
;note: this specifies the language to use for some engines and text filtering while using "screencapture". Valid values: ja: (Japanese) zh: (Chinese) ko: (Korean) ar: (Arabic) ru: (Russian) el: (Greek) he: (Hebrew) th: (Thai)
;Any other value will use Latin Extended (for most European languages and English).
;Two letter language code for filtering screencapture OCR results. Ex. "ja"
;for Japanese, "zh" for Chinese, "ko" for Korean, "ar" for Arabic, "ru" for
;Russian, "el" for Greek, "he" for Hebrew, "th" for Thai. Any other value will
;use Latin Extended (for most European languages and English).
;language = ja
;note: can be "text" (default) or "json" (to include coordinates).
;The output format for OCR results. Can be "text" (default) or "json" (to
;include coordinates).
;output_format = text
;Terminal window verbosity. Can be -2 (all recognized text is showed whole,
;default), -1 (only timestamps are shown), 0 (nothing is shown but errors), or
;larger than 0 to cut displayed text to that amount of characters.
;verbosity = -2
;[winrtocr]
;url = http://aaa.xxx.yyy.zzz:8000
;[oneocr]
;url = http://aaa.xxx.yyy.zzz:8001
;[azure]
;api_key = api_key_here
;endpoint = https://YOURPROJECT.cognitiveservices.azure.com/
;[mangaocr]
;pretrained_model_name_or_path = kha-white/manga-ocr-base
;force_cpu = False
;[easyocr]
;gpu = True
;[ocrspace]
;api_key = api_key_here
;engine_version = 2
;[rapidocr]
;high_accuracy_detection = False
;high_accuracy_recognition = True