Get rid of fire dependency

This commit is contained in:
AuroraWright
2025-04-10 12:59:35 +02:00
parent 77007611cf
commit b8fda7d4a4
5 changed files with 81 additions and 92 deletions

View File

@@ -1,3 +1 @@
__version__ = '0.1.10'
from owocr.ocr import *

View File

@@ -1,22 +1,7 @@
import fire
import inspect
from owocr.run import run, init_config
from .run import run
def main():
init_config()
from owocr.run import config
cli_args = inspect.getfullargspec(run)[0]
defaults = []
index = 0
for arg in cli_args:
defaults.append(config.get_general(arg))
index += 1
run.__defaults__ = tuple(defaults)
fire.Fire(run)
run()
if __name__ == '__main__':

View File

@@ -1,7 +1,43 @@
import os
import configparser
import argparse
import textwrap
import urllib.request
parser = argparse.ArgumentParser(prog='owocr', description=textwrap.dedent('''\
Runs OCR in the background.
It can read images copied to the system clipboard or placed in a directory, images sent via a websocket or a Unix domain socket, or directly capture a screen (or a portion of it) or a window.
Recognized texts can be either saved to system clipboard, appended to a text file or sent via a websocket.
'''))
parser.add_argument('-r', '--read_from', type=str, default=argparse.SUPPRESS,
help='Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.')
parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS,
help='Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.')
parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS,
help='OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".')
parser.add_argument('-p', '--pause_at_startup', action='store_true', default=argparse.SUPPRESS,
help='Pause at startup.')
parser.add_argument('-i', '--ignore_flag', action='store_true', default=argparse.SUPPRESS,
help='Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).')
parser.add_argument('-d', '--delete_images', action='store_true', default=argparse.SUPPRESS,
help='Delete image files after processing when reading from a directory.')
parser.add_argument('-n', '--notifications', action='store_true', default=argparse.SUPPRESS,
help='Show an operating system notification with the detected text.')
parser.add_argument('-a', '--auto_pause', type=float, default=argparse.SUPPRESS,
help='Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture. 0 to disable.')
parser.add_argument('-cp', '--combo_pause', type=str, default=argparse.SUPPRESS,
help='Specifies a combo to wait on for pausing the program. As an example: "<ctrl>+<shift>+p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
parser.add_argument('-cs', '--combo_engine_switch', type=str, default=argparse.SUPPRESS,
help='Specifies a combo to wait on for switching the OCR engine. As an example: "<ctrl>+<shift>+a". To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
parser.add_argument('-sa', '--screen_capture_area', type=str, default=argparse.SUPPRESS,
help='Specifies area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).')
parser.add_argument('-sd', '--screen_capture_delay_secs', type=float, default=argparse.SUPPRESS,
help='Specifies the delay (in seconds) between screenshots when reading with screen capture.')
parser.add_argument('-sw', '--screen_capture_only_active_windows', action='store_true', default=argparse.SUPPRESS,
help="When reading with screen capture and screen_capture_area is a window name, specifies whether to only target the window while it's active.")
parser.add_argument('-sc', '--screen_capture_combo', type=str, default=argparse.SUPPRESS,
help='When reading with screen capture, specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
class Config:
has_config = False
@@ -50,6 +86,8 @@ class Config:
return value
def __init__(self):
args = parser.parse_args()
self.__provided_cli_args = vars(args)
config = configparser.ConfigParser()
res = config.read(self.config_path)
@@ -74,6 +112,8 @@ class Config:
self.__engine_config[key.lower()][sub_key.lower()] = self.__parse(config[key][sub_key])
def get_general(self, value):
if self.__provided_cli_args.get(value, None) is not None:
return self.__provided_cli_args[value]
try:
return self.__general_config[value]
except KeyError:
@@ -87,3 +127,5 @@ class Config:
return self.__engine_config[value]
except KeyError:
return None
config = Config()

View File

@@ -3,28 +3,27 @@ import signal
import time
import threading
from pathlib import Path
import fire
import numpy as np
import pyperclipfix
import mss
import asyncio
import websockets
import socketserver
import queue
import io
import re
import inspect
import numpy as np
import pyperclipfix
import mss
import psutil
import asyncio
import websockets
import socketserver
from PIL import Image
from PIL import UnidentifiedImageError
from loguru import logger
from pynput import keyboard
from desktop_notifier import DesktopNotifierSync
import psutil
import inspect
from .ocr import *
from .config import Config
from .config import config
from .screen_coordinate_picker import get_screen_selection
try:
@@ -51,9 +50,6 @@ except ImportError:
pass
config = None
class WindowsClipboardThread(threading.Thread):
def __init__(self):
super().__init__(daemon=True)
@@ -533,7 +529,7 @@ def are_images_identical(img1, img2):
return (img1.shape == img2.shape) and (img1 == img2).all()
def process_and_write_results(img_or_path, write_to, notifications, last_result, filtering):
def process_and_write_results(img_or_path, last_result, filtering):
if auto_pause_handler:
auto_pause_handler.stop()
@@ -549,9 +545,10 @@ def process_and_write_results(img_or_path, write_to, notifications, last_result,
text, orig_text = filtering(text, last_result)
text = post_process(text)
logger.opt(ansi=True).info(f'Text recognized in {t1 - t0:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
if notifications:
if config.get_general('notifications'):
notifier.send(title='owocr', message='Text recognized: ' + text)
write_to = config.get_general('write_to')
if write_to == 'websocket':
websocket_server_thread.send_text(text)
elif write_to == 'clipboard':
@@ -572,50 +569,7 @@ def get_path_key(path):
return path, path.lstat().st_mtime
def init_config():
global config
config = Config()
def run(read_from=None,
write_to=None,
engine=None,
pause_at_startup=None,
ignore_flag=None,
delete_images=None,
notifications=None,
auto_pause=None,
combo_pause=None,
combo_engine_switch=None,
screen_capture_area=None,
screen_capture_delay_secs=None,
screen_capture_only_active_windows=None,
screen_capture_combo=None
):
"""
Japanese OCR client
Runs OCR in the background.
It can read images copied to the system clipboard or placed in a directory, images sent via a websocket or a Unix domain socket, or directly capture a screen (or a portion of it) or a window.
Recognized texts can be either saved to system clipboard, appended to a text file or sent via a websocket.
:param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.
:param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.
:param delay_secs: How often to check for new images, in seconds.
:param engine: OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".
:param pause_at_startup: Pause at startup.
:param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).
:param delete_images: Delete image files after processing when reading from a directory.
:param notifications: Show an operating system notification with the detected text.
:param auto_pause: Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture. 0 to disable.
:param combo_pause: Specifies a combo to wait on for pausing the program. As an example: "<ctrl>+<shift>+p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
:param combo_engine_switch: Specifies a combo to wait on for switching the OCR engine. As an example: "<ctrl>+<shift>+a". To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
:param screen_capture_area: Specifies area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).
:param screen_capture_delay_secs: Specifies the delay (in seconds) between screenshots when reading with screen capture.
:param screen_capture_only_active_windows: When reading with screen capture and screen_capture_area is a window name, specifies whether to only target the window while it's active.
:param screen_capture_combo: When reading with screen capture, specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
"""
def run():
logger.configure(handlers=[{'sink': sys.stderr, 'format': config.get_general('logger_format')}])
if config.has_config:
@@ -646,7 +600,7 @@ def run(read_from=None,
if engine_instance.available:
engine_instances.append(engine_instance)
engine_keys.append(engine_class.key)
if engine == engine_class.name:
if config.get_general('engine') == engine_class.name:
default_engine = engine_class.key
if len(engine_keys) == 0:
@@ -657,18 +611,20 @@ def run(read_from=None,
global terminated
global paused
global just_unpaused
global first_pressed
global notifier
global auto_pause_handler
read_from = config.get_general('read_from')
write_to = config.get_general('write_to')
terminated = False
paused = pause_at_startup
paused = config.get_general('pause_at_startup')
just_unpaused = True
first_pressed = None
auto_pause = config.get_general('auto_pause')
auto_pause_handler = None
engine_index = engine_keys.index(default_engine) if default_engine != '' else 0
engine_color = config.get_general('engine_color')
delay_secs = config.get_general('delay_secs')
screen_capture_on_combo = False
combo_pause = config.get_general('combo_pause')
combo_engine_switch = config.get_general('combo_engine_switch')
notifier = DesktopNotifierSync()
key_combos = {}
@@ -706,6 +662,7 @@ def run(read_from=None,
unix_socket_server_thread.start()
read_from_readable = 'unix socket'
elif read_from == 'clipboard':
ignore_flag = config.get_general('ignore_flag')
macos_clipboard_polling = False
windows_clipboard_polling = False
img = None
@@ -726,11 +683,16 @@ def run(read_from=None,
read_from_readable = 'clipboard'
elif read_from == 'screencapture':
screen_capture_area = config.get_general('screen_capture_area')
screen_capture_delay_secs = config.get_general('screen_capture_delay_secs')
screen_capture_combo = config.get_general('screen_capture_combo')
if screen_capture_combo != '':
screen_capture_on_combo = True
global screenshot_event
screenshot_event = threading.Event()
key_combos[screen_capture_combo] = on_screenshot_combo
else:
screen_capture_on_combo = False
if type(screen_capture_area) == tuple:
screen_capture_area = ','.join(map(str, screen_capture_area))
global screencapture_window_active
@@ -786,6 +748,7 @@ def run(read_from=None,
sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height}
logger.opt(ansi=True).info(f'Selected coordinates: {coord_left},{coord_top},{coord_width},{coord_height}')
else:
screen_capture_only_active_windows = config.get_general('screen_capture_only_active_windows')
area_invalid_error = '"screen_capture_area" must be empty, "screen_N" where N is a screen number starting from 1, a valid set of coordinates, or a valid window name'
if sys.platform == 'darwin':
if int(platform.mac_ver()[0].split('.')[0]) < 14:
@@ -844,6 +807,8 @@ def run(read_from=None,
filtering = TextFiltering()
read_from_readable = 'screen capture'
else:
delete_images = config.get_general('delete_images')
read_from = Path(read_from)
if not read_from.is_dir():
raise ValueError('read_from must be either "websocket", "unixsocket", "clipboard", "screencapture", or a path to a directory')
@@ -882,7 +847,7 @@ def run(read_from=None,
else:
if not paused:
img = Image.open(io.BytesIO(item))
process_and_write_results(img, write_to, notifications, None, None)
process_and_write_results(img, None, None)
elif read_from == 'unixsocket':
while True:
try:
@@ -892,7 +857,7 @@ def run(read_from=None,
else:
if not paused:
img = Image.open(io.BytesIO(item))
process_and_write_results(img, write_to, notifications, None, None)
process_and_write_results(img, None, None)
elif read_from == 'clipboard':
process_clipboard = False
if windows_clipboard_polling:
@@ -940,7 +905,7 @@ def run(read_from=None,
process_clipboard = True
if process_clipboard:
process_and_write_results(img, write_to, notifications, None, None)
process_and_write_results(img, None, None)
just_unpaused = False
@@ -1004,7 +969,7 @@ def run(read_from=None,
else:
sct_img = sct.grab(sct_params)
img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
res = process_and_write_results(img, write_to, notifications, last_result, filtering)
res = process_and_write_results(img, last_result, filtering)
if res:
last_result = (res, engine_index)
delay = screen_capture_delay_secs
@@ -1027,7 +992,7 @@ def run(read_from=None,
except (UnidentifiedImageError, OSError) as e:
logger.warning(f'Error while reading file {path}: {e}')
else:
process_and_write_results(img, write_to, notifications, None, None)
process_and_write_results(img, None, None)
img.close()
if delete_images:
Path.unlink(path)

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "owocr"
version = "1.13.10"
version = "1.13.11"
description = "Japanese OCR"
readme = "README.md"
requires-python = ">=3.11"
@@ -16,7 +16,6 @@ classifiers = [
"Programming Language :: Python :: 3",
]
dependencies = [
"fire",
"jaconv",
"loguru",
"numpy",