From b8fda7d4a4a8379dc1492443b70b24e1c695b022 Mon Sep 17 00:00:00 2001 From: AuroraWright Date: Thu, 10 Apr 2025 12:59:35 +0200 Subject: [PATCH] Get rid of fire dependency --- owocr/__init__.py | 2 - owocr/__main__.py | 19 +-------- owocr/config.py | 44 ++++++++++++++++++- owocr/run.py | 105 ++++++++++++++++------------------------------ pyproject.toml | 3 +- 5 files changed, 81 insertions(+), 92 deletions(-) diff --git a/owocr/__init__.py b/owocr/__init__.py index 657df77..2bcd0af 100644 --- a/owocr/__init__.py +++ b/owocr/__init__.py @@ -1,3 +1 @@ -__version__ = '0.1.10' - from owocr.ocr import * diff --git a/owocr/__main__.py b/owocr/__main__.py index c226d55..1a2968c 100644 --- a/owocr/__main__.py +++ b/owocr/__main__.py @@ -1,22 +1,7 @@ -import fire -import inspect -from owocr.run import run, init_config +from .run import run def main(): - init_config() - - from owocr.run import config - cli_args = inspect.getfullargspec(run)[0] - defaults = [] - - index = 0 - for arg in cli_args: - defaults.append(config.get_general(arg)) - index += 1 - - run.__defaults__ = tuple(defaults) - - fire.Fire(run) + run() if __name__ == '__main__': diff --git a/owocr/config.py b/owocr/config.py index 8b14ab7..4a8323e 100644 --- a/owocr/config.py +++ b/owocr/config.py @@ -1,7 +1,43 @@ import os import configparser +import argparse +import textwrap import urllib.request +parser = argparse.ArgumentParser(prog='owocr', description=textwrap.dedent('''\ + Runs OCR in the background. + It can read images copied to the system clipboard or placed in a directory, images sent via a websocket or a Unix domain socket, or directly capture a screen (or a portion of it) or a window. + Recognized texts can be either saved to system clipboard, appended to a text file or sent via a websocket. +''')) + +parser.add_argument('-r', '--read_from', type=str, default=argparse.SUPPRESS, + help='Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.') +parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS, + help='Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.') +parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS, + help='OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".') +parser.add_argument('-p', '--pause_at_startup', action='store_true', default=argparse.SUPPRESS, + help='Pause at startup.') +parser.add_argument('-i', '--ignore_flag', action='store_true', default=argparse.SUPPRESS, + help='Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).') +parser.add_argument('-d', '--delete_images', action='store_true', default=argparse.SUPPRESS, + help='Delete image files after processing when reading from a directory.') +parser.add_argument('-n', '--notifications', action='store_true', default=argparse.SUPPRESS, + help='Show an operating system notification with the detected text.') +parser.add_argument('-a', '--auto_pause', type=float, default=argparse.SUPPRESS, + help='Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture. 0 to disable.') +parser.add_argument('-cp', '--combo_pause', type=str, default=argparse.SUPPRESS, + help='Specifies a combo to wait on for pausing the program. As an example: "++p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key') +parser.add_argument('-cs', '--combo_engine_switch', type=str, default=argparse.SUPPRESS, + help='Specifies a combo to wait on for switching the OCR engine. As an example: "++a". To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key') +parser.add_argument('-sa', '--screen_capture_area', type=str, default=argparse.SUPPRESS, + help='Specifies area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).') +parser.add_argument('-sd', '--screen_capture_delay_secs', type=float, default=argparse.SUPPRESS, + help='Specifies the delay (in seconds) between screenshots when reading with screen capture.') +parser.add_argument('-sw', '--screen_capture_only_active_windows', action='store_true', default=argparse.SUPPRESS, + help="When reading with screen capture and screen_capture_area is a window name, specifies whether to only target the window while it's active.") +parser.add_argument('-sc', '--screen_capture_combo', type=str, default=argparse.SUPPRESS, + help='When reading with screen capture, specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: "++s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key') class Config: has_config = False @@ -50,6 +86,8 @@ class Config: return value def __init__(self): + args = parser.parse_args() + self.__provided_cli_args = vars(args) config = configparser.ConfigParser() res = config.read(self.config_path) @@ -74,6 +112,8 @@ class Config: self.__engine_config[key.lower()][sub_key.lower()] = self.__parse(config[key][sub_key]) def get_general(self, value): + if self.__provided_cli_args.get(value, None) is not None: + return self.__provided_cli_args[value] try: return self.__general_config[value] except KeyError: @@ -86,4 +126,6 @@ class Config: try: return self.__engine_config[value] except KeyError: - return None \ No newline at end of file + return None + +config = Config() diff --git a/owocr/run.py b/owocr/run.py index e456395..290a926 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -3,28 +3,27 @@ import signal import time import threading from pathlib import Path - -import fire -import numpy as np -import pyperclipfix -import mss -import asyncio -import websockets -import socketserver import queue import io import re +import inspect + +import numpy as np +import pyperclipfix +import mss +import psutil +import asyncio +import websockets +import socketserver from PIL import Image from PIL import UnidentifiedImageError from loguru import logger from pynput import keyboard from desktop_notifier import DesktopNotifierSync -import psutil -import inspect from .ocr import * -from .config import Config +from .config import config from .screen_coordinate_picker import get_screen_selection try: @@ -51,9 +50,6 @@ except ImportError: pass -config = None - - class WindowsClipboardThread(threading.Thread): def __init__(self): super().__init__(daemon=True) @@ -533,7 +529,7 @@ def are_images_identical(img1, img2): return (img1.shape == img2.shape) and (img1 == img2).all() -def process_and_write_results(img_or_path, write_to, notifications, last_result, filtering): +def process_and_write_results(img_or_path, last_result, filtering): if auto_pause_handler: auto_pause_handler.stop() @@ -549,9 +545,10 @@ def process_and_write_results(img_or_path, write_to, notifications, last_result, text, orig_text = filtering(text, last_result) text = post_process(text) logger.opt(ansi=True).info(f'Text recognized in {t1 - t0:0.03f}s using <{engine_color}>{engine_instance.readable_name}: {text}') - if notifications: + if config.get_general('notifications'): notifier.send(title='owocr', message='Text recognized: ' + text) + write_to = config.get_general('write_to') if write_to == 'websocket': websocket_server_thread.send_text(text) elif write_to == 'clipboard': @@ -572,50 +569,7 @@ def get_path_key(path): return path, path.lstat().st_mtime -def init_config(): - global config - config = Config() - - -def run(read_from=None, - write_to=None, - engine=None, - pause_at_startup=None, - ignore_flag=None, - delete_images=None, - notifications=None, - auto_pause=None, - combo_pause=None, - combo_engine_switch=None, - screen_capture_area=None, - screen_capture_delay_secs=None, - screen_capture_only_active_windows=None, - screen_capture_combo=None - ): - """ - Japanese OCR client - - Runs OCR in the background. - It can read images copied to the system clipboard or placed in a directory, images sent via a websocket or a Unix domain socket, or directly capture a screen (or a portion of it) or a window. - Recognized texts can be either saved to system clipboard, appended to a text file or sent via a websocket. - - :param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory. - :param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file. - :param delay_secs: How often to check for new images, in seconds. - :param engine: OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace". - :param pause_at_startup: Pause at startup. - :param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string). - :param delete_images: Delete image files after processing when reading from a directory. - :param notifications: Show an operating system notification with the detected text. - :param auto_pause: Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture. 0 to disable. - :param combo_pause: Specifies a combo to wait on for pausing the program. As an example: "++p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key - :param combo_engine_switch: Specifies a combo to wait on for switching the OCR engine. As an example: "++a". To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key - :param screen_capture_area: Specifies area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used). - :param screen_capture_delay_secs: Specifies the delay (in seconds) between screenshots when reading with screen capture. - :param screen_capture_only_active_windows: When reading with screen capture and screen_capture_area is a window name, specifies whether to only target the window while it's active. - :param screen_capture_combo: When reading with screen capture, specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: "++s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key - """ - +def run(): logger.configure(handlers=[{'sink': sys.stderr, 'format': config.get_general('logger_format')}]) if config.has_config: @@ -646,7 +600,7 @@ def run(read_from=None, if engine_instance.available: engine_instances.append(engine_instance) engine_keys.append(engine_class.key) - if engine == engine_class.name: + if config.get_general('engine') == engine_class.name: default_engine = engine_class.key if len(engine_keys) == 0: @@ -657,18 +611,20 @@ def run(read_from=None, global terminated global paused global just_unpaused - global first_pressed global notifier global auto_pause_handler + read_from = config.get_general('read_from') + write_to = config.get_general('write_to') terminated = False - paused = pause_at_startup + paused = config.get_general('pause_at_startup') just_unpaused = True - first_pressed = None + auto_pause = config.get_general('auto_pause') auto_pause_handler = None engine_index = engine_keys.index(default_engine) if default_engine != '' else 0 engine_color = config.get_general('engine_color') delay_secs = config.get_general('delay_secs') - screen_capture_on_combo = False + combo_pause = config.get_general('combo_pause') + combo_engine_switch = config.get_general('combo_engine_switch') notifier = DesktopNotifierSync() key_combos = {} @@ -706,6 +662,7 @@ def run(read_from=None, unix_socket_server_thread.start() read_from_readable = 'unix socket' elif read_from == 'clipboard': + ignore_flag = config.get_general('ignore_flag') macos_clipboard_polling = False windows_clipboard_polling = False img = None @@ -726,11 +683,16 @@ def run(read_from=None, read_from_readable = 'clipboard' elif read_from == 'screencapture': + screen_capture_area = config.get_general('screen_capture_area') + screen_capture_delay_secs = config.get_general('screen_capture_delay_secs') + screen_capture_combo = config.get_general('screen_capture_combo') if screen_capture_combo != '': screen_capture_on_combo = True global screenshot_event screenshot_event = threading.Event() key_combos[screen_capture_combo] = on_screenshot_combo + else: + screen_capture_on_combo = False if type(screen_capture_area) == tuple: screen_capture_area = ','.join(map(str, screen_capture_area)) global screencapture_window_active @@ -786,6 +748,7 @@ def run(read_from=None, sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height} logger.opt(ansi=True).info(f'Selected coordinates: {coord_left},{coord_top},{coord_width},{coord_height}') else: + screen_capture_only_active_windows = config.get_general('screen_capture_only_active_windows') area_invalid_error = '"screen_capture_area" must be empty, "screen_N" where N is a screen number starting from 1, a valid set of coordinates, or a valid window name' if sys.platform == 'darwin': if int(platform.mac_ver()[0].split('.')[0]) < 14: @@ -844,6 +807,8 @@ def run(read_from=None, filtering = TextFiltering() read_from_readable = 'screen capture' else: + delete_images = config.get_general('delete_images') + read_from = Path(read_from) if not read_from.is_dir(): raise ValueError('read_from must be either "websocket", "unixsocket", "clipboard", "screencapture", or a path to a directory') @@ -882,7 +847,7 @@ def run(read_from=None, else: if not paused: img = Image.open(io.BytesIO(item)) - process_and_write_results(img, write_to, notifications, None, None) + process_and_write_results(img, None, None) elif read_from == 'unixsocket': while True: try: @@ -892,7 +857,7 @@ def run(read_from=None, else: if not paused: img = Image.open(io.BytesIO(item)) - process_and_write_results(img, write_to, notifications, None, None) + process_and_write_results(img, None, None) elif read_from == 'clipboard': process_clipboard = False if windows_clipboard_polling: @@ -940,7 +905,7 @@ def run(read_from=None, process_clipboard = True if process_clipboard: - process_and_write_results(img, write_to, notifications, None, None) + process_and_write_results(img, None, None) just_unpaused = False @@ -1004,7 +969,7 @@ def run(read_from=None, else: sct_img = sct.grab(sct_params) img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX') - res = process_and_write_results(img, write_to, notifications, last_result, filtering) + res = process_and_write_results(img, last_result, filtering) if res: last_result = (res, engine_index) delay = screen_capture_delay_secs @@ -1027,7 +992,7 @@ def run(read_from=None, except (UnidentifiedImageError, OSError) as e: logger.warning(f'Error while reading file {path}: {e}') else: - process_and_write_results(img, write_to, notifications, None, None) + process_and_write_results(img, None, None) img.close() if delete_images: Path.unlink(path) diff --git a/pyproject.toml b/pyproject.toml index 65fb13c..aa8cea5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "owocr" -version = "1.13.10" +version = "1.13.11" description = "Japanese OCR" readme = "README.md" requires-python = ">=3.11" @@ -16,7 +16,6 @@ classifiers = [ "Programming Language :: Python :: 3", ] dependencies = [ - "fire", "jaconv", "loguru", "numpy",