Get rid of fire dependency

2025-04-10 12:59:35 +02:00
parent 77007611cf
commit b8fda7d4a4
5 changed files with 81 additions and 92 deletions
--- a/owocr/init.py
+++ b/owocr/init.py
@@ -1,3 +1 @@
 __version__ = '0.1.10'
 from owocr.ocr import *
--- a/owocr/main.py
+++ b/owocr/main.py
@@ -1,22 +1,7 @@
-import fire
+from .run import run
 import inspect
 from owocr.run import run, init_config
 def main():
-    init_config()
+    run()
    from owocr.run import config
    cli_args = inspect.getfullargspec(run)[0]
    defaults = []
    index = 0
    for arg in cli_args:
        defaults.append(config.get_general(arg))
        index += 1
    run.__defaults__ = tuple(defaults)
    fire.Fire(run)
 if __name__ == '__main__':
--- a/owocr/config.py
+++ b/owocr/config.py
@@ -1,7 +1,43 @@
 import os
 import configparser
 import argparse
 import textwrap
 import urllib.request
 parser = argparse.ArgumentParser(prog='owocr', description=textwrap.dedent('''\
    Runs OCR in the background.
    It can read images copied to the system clipboard or placed in a directory, images sent via a websocket or a Unix domain socket, or directly capture a screen (or a portion of it) or a window.
    Recognized texts can be either saved to system clipboard, appended to a text file or sent via a websocket.
 '''))
 parser.add_argument('-r', '--read_from', type=str, default=argparse.SUPPRESS,
                    help='Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.')
 parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS,
                    help='Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.')
 parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS,
                    help='OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".')
 parser.add_argument('-p', '--pause_at_startup', action='store_true', default=argparse.SUPPRESS,
                    help='Pause at startup.')
 parser.add_argument('-i', '--ignore_flag', action='store_true', default=argparse.SUPPRESS,
                    help='Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).')
 parser.add_argument('-d', '--delete_images', action='store_true', default=argparse.SUPPRESS,
                    help='Delete image files after processing when reading from a directory.')
 parser.add_argument('-n', '--notifications', action='store_true', default=argparse.SUPPRESS,
                    help='Show an operating system notification with the detected text.')
 parser.add_argument('-a', '--auto_pause', type=float, default=argparse.SUPPRESS,
                    help='Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture. 0 to disable.')
 parser.add_argument('-cp', '--combo_pause', type=str, default=argparse.SUPPRESS,
                    help='Specifies a combo to wait on for pausing the program. As an example: "<ctrl>+<shift>+p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
 parser.add_argument('-cs', '--combo_engine_switch', type=str, default=argparse.SUPPRESS,
                    help='Specifies a combo to wait on for switching the OCR engine. As an example: "<ctrl>+<shift>+a". To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
 parser.add_argument('-sa', '--screen_capture_area', type=str, default=argparse.SUPPRESS,
                    help='Specifies area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).')
 parser.add_argument('-sd', '--screen_capture_delay_secs', type=float, default=argparse.SUPPRESS,
                    help='Specifies the delay (in seconds) between screenshots when reading with screen capture.')
 parser.add_argument('-sw', '--screen_capture_only_active_windows', action='store_true', default=argparse.SUPPRESS,
                    help="When reading with screen capture and screen_capture_area is a window name, specifies whether to only target the window while it's active.")
 parser.add_argument('-sc', '--screen_capture_combo', type=str, default=argparse.SUPPRESS,
                    help='When reading with screen capture, specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
 class Config:
    has_config = False
@@ -50,6 +86,8 @@ class Config:
        return value
    def __init__(self):
        args = parser.parse_args()
        self.__provided_cli_args = vars(args)
        config = configparser.ConfigParser()
        res = config.read(self.config_path)
@@ -74,6 +112,8 @@ class Config:
                    self.__engine_config[key.lower()][sub_key.lower()] = self.__parse(config[key][sub_key])
    def get_general(self, value):
        if self.__provided_cli_args.get(value, None) is not None:
            return self.__provided_cli_args[value]
        try:
            return self.__general_config[value]
        except KeyError:
@@ -86,4 +126,6 @@ class Config:
        try:
            return self.__engine_config[value]
        except KeyError:
-            return None
+            return None
 config = Config()
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -3,28 +3,27 @@ import signal
 import time
 import threading
 from pathlib import Path
 import fire
 import numpy as np
 import pyperclipfix
 import mss
 import asyncio
 import websockets
 import socketserver
 import queue
 import io
 import re
 import inspect
 import numpy as np
 import pyperclipfix
 import mss
 import psutil
 import asyncio
 import websockets
 import socketserver
 from PIL import Image
 from PIL import UnidentifiedImageError
 from loguru import logger
 from pynput import keyboard
 from desktop_notifier import DesktopNotifierSync
 import psutil
 import inspect
 from .ocr import *
-from .config import Config
+from .config import config
 from .screen_coordinate_picker import get_screen_selection
 try:
@@ -51,9 +50,6 @@ except ImportError:
    pass
 config = None
 class WindowsClipboardThread(threading.Thread):
    def __init__(self):
        super().__init__(daemon=True)
@@ -533,7 +529,7 @@ def are_images_identical(img1, img2):
    return (img1.shape == img2.shape) and (img1 == img2).all()
-def process_and_write_results(img_or_path, write_to, notifications, last_result, filtering):
+def process_and_write_results(img_or_path, last_result, filtering):
    if auto_pause_handler:
        auto_pause_handler.stop()
@@ -549,9 +545,10 @@ def process_and_write_results(img_or_path, write_to, notifications, last_result,
            text, orig_text = filtering(text, last_result)
        text = post_process(text)
        logger.opt(ansi=True).info(f'Text recognized in {t1 - t0:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
-        if notifications:
+        if config.get_general('notifications'):
            notifier.send(title='owocr', message='Text recognized: ' + text)
        write_to = config.get_general('write_to')
        if write_to == 'websocket':
            websocket_server_thread.send_text(text)
        elif write_to == 'clipboard':
@@ -572,50 +569,7 @@ def get_path_key(path):
    return path, path.lstat().st_mtime
-def init_config():
+def run():
    global config
    config = Config()
 def run(read_from=None,
        write_to=None,
        engine=None,
        pause_at_startup=None,
        ignore_flag=None,
        delete_images=None,
        notifications=None,
        auto_pause=None,
        combo_pause=None,
        combo_engine_switch=None,
        screen_capture_area=None,
        screen_capture_delay_secs=None,
        screen_capture_only_active_windows=None,
        screen_capture_combo=None
        ):
    """
    Japanese OCR client
    Runs OCR in the background.
    It can read images copied to the system clipboard or placed in a directory, images sent via a websocket or a Unix domain socket, or directly capture a screen (or a portion of it) or a window.
    Recognized texts can be either saved to system clipboard, appended to a text file or sent via a websocket.
    :param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.
    :param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.
    :param delay_secs: How often to check for new images, in seconds.
    :param engine: OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".
    :param pause_at_startup: Pause at startup.
    :param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).
    :param delete_images: Delete image files after processing when reading from a directory.
    :param notifications: Show an operating system notification with the detected text.
    :param auto_pause: Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture. 0 to disable.
    :param combo_pause: Specifies a combo to wait on for pausing the program. As an example: "<ctrl>+<shift>+p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
    :param combo_engine_switch: Specifies a combo to wait on for switching the OCR engine. As an example: "<ctrl>+<shift>+a". To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
    :param screen_capture_area: Specifies area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).
    :param screen_capture_delay_secs: Specifies the delay (in seconds) between screenshots when reading with screen capture.
    :param screen_capture_only_active_windows: When reading with screen capture and screen_capture_area is a window name, specifies whether to only target the window while it's active.
    :param screen_capture_combo: When reading with screen capture, specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
    """
    logger.configure(handlers=[{'sink': sys.stderr, 'format': config.get_general('logger_format')}])
    if config.has_config:
@@ -646,7 +600,7 @@ def run(read_from=None,
            if engine_instance.available:
                engine_instances.append(engine_instance)
                engine_keys.append(engine_class.key)
-                if engine == engine_class.name:
+                if config.get_general('engine') == engine_class.name:
                    default_engine = engine_class.key
    if len(engine_keys) == 0:
@@ -657,18 +611,20 @@ def run(read_from=None,
    global terminated
    global paused
    global just_unpaused
    global first_pressed
    global notifier
    global auto_pause_handler
    read_from = config.get_general('read_from')
    write_to = config.get_general('write_to')
    terminated = False
-    paused = pause_at_startup
+    paused = config.get_general('pause_at_startup')
    just_unpaused = True
-    first_pressed = None
+    auto_pause = config.get_general('auto_pause')
    auto_pause_handler = None
    engine_index = engine_keys.index(default_engine) if default_engine != '' else 0
    engine_color = config.get_general('engine_color')
    delay_secs = config.get_general('delay_secs')
-    screen_capture_on_combo = False
+    combo_pause = config.get_general('combo_pause')
    combo_engine_switch = config.get_general('combo_engine_switch')
    notifier = DesktopNotifierSync()
    key_combos = {}
@@ -706,6 +662,7 @@ def run(read_from=None,
        unix_socket_server_thread.start()
        read_from_readable = 'unix socket'
    elif read_from == 'clipboard':
        ignore_flag = config.get_general('ignore_flag')
        macos_clipboard_polling = False
        windows_clipboard_polling = False
        img = None
@@ -726,11 +683,16 @@ def run(read_from=None,
        read_from_readable = 'clipboard'
    elif read_from == 'screencapture':
        screen_capture_area = config.get_general('screen_capture_area')
        screen_capture_delay_secs = config.get_general('screen_capture_delay_secs')
        screen_capture_combo = config.get_general('screen_capture_combo')
        if screen_capture_combo != '':
            screen_capture_on_combo = True
            global screenshot_event
            screenshot_event = threading.Event()
            key_combos[screen_capture_combo] = on_screenshot_combo
        else:
            screen_capture_on_combo = False
        if type(screen_capture_area) == tuple:
            screen_capture_area = ','.join(map(str, screen_capture_area))
        global screencapture_window_active
@@ -786,6 +748,7 @@ def run(read_from=None,
            sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height}
            logger.opt(ansi=True).info(f'Selected coordinates: {coord_left},{coord_top},{coord_width},{coord_height}')
        else:
            screen_capture_only_active_windows = config.get_general('screen_capture_only_active_windows')
            area_invalid_error = '"screen_capture_area" must be empty, "screen_N" where N is a screen number starting from 1, a valid set of coordinates, or a valid window name'
            if sys.platform == 'darwin':
                if int(platform.mac_ver()[0].split('.')[0]) < 14:
@@ -844,6 +807,8 @@ def run(read_from=None,
        filtering = TextFiltering()
        read_from_readable = 'screen capture'
    else:
        delete_images = config.get_general('delete_images')
        read_from = Path(read_from)
        if not read_from.is_dir():
            raise ValueError('read_from must be either "websocket", "unixsocket", "clipboard", "screencapture", or a path to a directory')
@@ -882,7 +847,7 @@ def run(read_from=None,
                else:
                    if not paused:
                        img = Image.open(io.BytesIO(item))
-                        process_and_write_results(img, write_to, notifications, None, None)
+                        process_and_write_results(img, None, None)
        elif read_from == 'unixsocket':
            while True:
                try:
@@ -892,7 +857,7 @@ def run(read_from=None,
                else:
                    if not paused:
                        img = Image.open(io.BytesIO(item))
-                        process_and_write_results(img, write_to, notifications, None, None)
+                        process_and_write_results(img, None, None)
        elif read_from == 'clipboard':
            process_clipboard = False
            if windows_clipboard_polling:
@@ -940,7 +905,7 @@ def run(read_from=None,
                            process_clipboard = True
            if process_clipboard:
-                process_and_write_results(img, write_to, notifications, None, None)
+                process_and_write_results(img, None, None)
            just_unpaused = False
@@ -1004,7 +969,7 @@ def run(read_from=None,
                else:
                    sct_img = sct.grab(sct_params)
                    img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
-                res = process_and_write_results(img, write_to, notifications, last_result, filtering)
+                res = process_and_write_results(img, last_result, filtering)
                if res:
                    last_result = (res, engine_index)
                delay = screen_capture_delay_secs
@@ -1027,7 +992,7 @@ def run(read_from=None,
                            except (UnidentifiedImageError, OSError) as e:
                                logger.warning(f'Error while reading file {path}: {e}')
                            else:
-                                process_and_write_results(img, write_to, notifications, None, None)
+                                process_and_write_results(img, None, None)
                                img.close()
                                if delete_images:
                                    Path.unlink(path)
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "owocr"
-version = "1.13.10"
+version = "1.13.11"
 description = "Japanese OCR"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -16,7 +16,6 @@ classifiers = [
    "Programming Language :: Python :: 3",
 ]
 dependencies = [
    "fire",
    "jaconv",
    "loguru",
    "numpy",
`@@ -1,3 +1 @@`
	`__version__ = '0.1.10'`

	`from owocr.ocr import *`	`from owocr.ocr import *`