New version, fix config file issues, fix Windows crashes in edge cases

This commit is contained in:
AuroraWright
2025-05-03 06:55:08 +02:00
parent 010f319672
commit d2483a4af1
4 changed files with 27 additions and 24 deletions

View File

@@ -7,13 +7,13 @@ import urllib.request
parser = argparse.ArgumentParser(prog='owocr', description=textwrap.dedent('''\
Runs OCR in the background.
It can read images copied to the system clipboard or placed in a directory, images sent via a websocket or a Unix domain socket, or directly capture a screen (or a portion of it) or a window.
Recognized texts can be either saved to system clipboard, appended to a text file or sent via a websocket.
Recognized text can be either saved to system clipboard, appended to a text file or sent via a websocket.
'''))
parser.add_argument('-r', '--read_from', type=str, default=argparse.SUPPRESS,
help='Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.')
help='Where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.')
parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS,
help='Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.')
help='Where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.')
parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS,
help='OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".')
parser.add_argument('-p', '--pause_at_startup', action='store_true', default=argparse.SUPPRESS,
@@ -27,17 +27,17 @@ parser.add_argument('-n', '--notifications', action='store_true', default=argpar
parser.add_argument('-a', '--auto_pause', type=float, default=argparse.SUPPRESS,
help='Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture. 0 to disable.')
parser.add_argument('-cp', '--combo_pause', type=str, default=argparse.SUPPRESS,
help='Specifies a combo to wait on for pausing the program. As an example: "<ctrl>+<shift>+p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
help='Combo to wait on for pausing the program. As an example: "<ctrl>+<shift>+p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
parser.add_argument('-cs', '--combo_engine_switch', type=str, default=argparse.SUPPRESS,
help='Specifies a combo to wait on for switching the OCR engine. As an example: "<ctrl>+<shift>+a". To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
help='Combo to wait on for switching the OCR engine. As an example: "<ctrl>+<shift>+a". To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
parser.add_argument('-sa', '--screen_capture_area', type=str, default=argparse.SUPPRESS,
help='Specifies area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).')
help='Area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).')
parser.add_argument('-sd', '--screen_capture_delay_secs', type=float, default=argparse.SUPPRESS,
help='Specifies the delay (in seconds) between screenshots when reading with screen capture.')
help='Delay (in seconds) between screenshots when reading with screen capture.')
parser.add_argument('-sw', '--screen_capture_only_active_windows', action='store_true', default=argparse.SUPPRESS,
help="When reading with screen capture and screen_capture_area is a window name, specifies whether to only target the window while it's active.")
help="When reading with screen capture and screen_capture_area is a window name, only target the window while it's active.")
parser.add_argument('-sc', '--screen_capture_combo', type=str, default=argparse.SUPPRESS,
help='When reading with screen capture, specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
help='When reading with screen capture, combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
class Config:
has_config = False

View File

@@ -870,14 +870,17 @@ def run():
except pywintypes.error:
pass
time.sleep(0.1)
if win32clipboard.IsClipboardFormatAvailable(win32clipboard.CF_DIB):
clipboard_text = ''
if win32clipboard.IsClipboardFormatAvailable(win32clipboard.CF_UNICODETEXT):
clipboard_text = win32clipboard.GetClipboardData(win32clipboard.CF_UNICODETEXT)
if ignore_flag or clipboard_text != '*ocr_ignore*':
img = Image.open(io.BytesIO(win32clipboard.GetClipboardData(win32clipboard.CF_DIB)))
process_clipboard = True
win32clipboard.CloseClipboard()
try:
if win32clipboard.IsClipboardFormatAvailable(win32clipboard.CF_DIB):
clipboard_text = ''
if win32clipboard.IsClipboardFormatAvailable(win32clipboard.CF_UNICODETEXT):
clipboard_text = win32clipboard.GetClipboardData(win32clipboard.CF_UNICODETEXT)
if ignore_flag or clipboard_text != '*ocr_ignore*':
img = Image.open(io.BytesIO(win32clipboard.GetClipboardData(win32clipboard.CF_DIB)))
process_clipboard = True
win32clipboard.CloseClipboard()
except pywintypes.error:
pass
elif macos_clipboard_polling:
if not paused:
with objc.autorelease_pool():

View File

@@ -27,17 +27,17 @@
;screen_capture_delay_secs = 3
;note: this specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: <ctrl>+<shift>+s. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
;screen_capture_combo = <ctrl>+<shift>+s
[winrtocr]
;[winrtocr]
;url = http://aaa.xxx.yyy.zzz:8000
[oneocr]
;[oneocr]
;url = http://aaa.xxx.yyy.zzz:8001
[azure]
;[azure]
;api_key = api_key_here
;endpoint = https://YOURPROJECT.cognitiveservices.azure.com/
[mangaocr]
;[mangaocr]
;pretrained_model_name_or_path = kha-white/manga-ocr-base
;force_cpu = False
[easyocr]
;[easyocr]
;gpu = True
[ocrspace]
;[ocrspace]
;api_key = api_key_here

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "owocr"
version = "1.13.11"
version = "1.13.12"
description = "Japanese OCR"
readme = "README.md"
requires-python = ">=3.11"