Get rid of fire dependency

This commit is contained in:
AuroraWright
2025-04-10 12:59:35 +02:00
parent 77007611cf
commit b8fda7d4a4
5 changed files with 81 additions and 92 deletions

View File

@@ -1,3 +1 @@
__version__ = '0.1.10'
from owocr.ocr import * from owocr.ocr import *

View File

@@ -1,22 +1,7 @@
import fire from .run import run
import inspect
from owocr.run import run, init_config
def main(): def main():
init_config() run()
from owocr.run import config
cli_args = inspect.getfullargspec(run)[0]
defaults = []
index = 0
for arg in cli_args:
defaults.append(config.get_general(arg))
index += 1
run.__defaults__ = tuple(defaults)
fire.Fire(run)
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -1,7 +1,43 @@
import os import os
import configparser import configparser
import argparse
import textwrap
import urllib.request import urllib.request
parser = argparse.ArgumentParser(prog='owocr', description=textwrap.dedent('''\
Runs OCR in the background.
It can read images copied to the system clipboard or placed in a directory, images sent via a websocket or a Unix domain socket, or directly capture a screen (or a portion of it) or a window.
Recognized texts can be either saved to system clipboard, appended to a text file or sent via a websocket.
'''))
parser.add_argument('-r', '--read_from', type=str, default=argparse.SUPPRESS,
help='Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.')
parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS,
help='Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.')
parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS,
help='OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".')
parser.add_argument('-p', '--pause_at_startup', action='store_true', default=argparse.SUPPRESS,
help='Pause at startup.')
parser.add_argument('-i', '--ignore_flag', action='store_true', default=argparse.SUPPRESS,
help='Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).')
parser.add_argument('-d', '--delete_images', action='store_true', default=argparse.SUPPRESS,
help='Delete image files after processing when reading from a directory.')
parser.add_argument('-n', '--notifications', action='store_true', default=argparse.SUPPRESS,
help='Show an operating system notification with the detected text.')
parser.add_argument('-a', '--auto_pause', type=float, default=argparse.SUPPRESS,
help='Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture. 0 to disable.')
parser.add_argument('-cp', '--combo_pause', type=str, default=argparse.SUPPRESS,
help='Specifies a combo to wait on for pausing the program. As an example: "<ctrl>+<shift>+p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
parser.add_argument('-cs', '--combo_engine_switch', type=str, default=argparse.SUPPRESS,
help='Specifies a combo to wait on for switching the OCR engine. As an example: "<ctrl>+<shift>+a". To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
parser.add_argument('-sa', '--screen_capture_area', type=str, default=argparse.SUPPRESS,
help='Specifies area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).')
parser.add_argument('-sd', '--screen_capture_delay_secs', type=float, default=argparse.SUPPRESS,
help='Specifies the delay (in seconds) between screenshots when reading with screen capture.')
parser.add_argument('-sw', '--screen_capture_only_active_windows', action='store_true', default=argparse.SUPPRESS,
help="When reading with screen capture and screen_capture_area is a window name, specifies whether to only target the window while it's active.")
parser.add_argument('-sc', '--screen_capture_combo', type=str, default=argparse.SUPPRESS,
help='When reading with screen capture, specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
class Config: class Config:
has_config = False has_config = False
@@ -50,6 +86,8 @@ class Config:
return value return value
def __init__(self): def __init__(self):
args = parser.parse_args()
self.__provided_cli_args = vars(args)
config = configparser.ConfigParser() config = configparser.ConfigParser()
res = config.read(self.config_path) res = config.read(self.config_path)
@@ -74,6 +112,8 @@ class Config:
self.__engine_config[key.lower()][sub_key.lower()] = self.__parse(config[key][sub_key]) self.__engine_config[key.lower()][sub_key.lower()] = self.__parse(config[key][sub_key])
def get_general(self, value): def get_general(self, value):
if self.__provided_cli_args.get(value, None) is not None:
return self.__provided_cli_args[value]
try: try:
return self.__general_config[value] return self.__general_config[value]
except KeyError: except KeyError:
@@ -86,4 +126,6 @@ class Config:
try: try:
return self.__engine_config[value] return self.__engine_config[value]
except KeyError: except KeyError:
return None return None
config = Config()

View File

@@ -3,28 +3,27 @@ import signal
import time import time
import threading import threading
from pathlib import Path from pathlib import Path
import fire
import numpy as np
import pyperclipfix
import mss
import asyncio
import websockets
import socketserver
import queue import queue
import io import io
import re import re
import inspect
import numpy as np
import pyperclipfix
import mss
import psutil
import asyncio
import websockets
import socketserver
from PIL import Image from PIL import Image
from PIL import UnidentifiedImageError from PIL import UnidentifiedImageError
from loguru import logger from loguru import logger
from pynput import keyboard from pynput import keyboard
from desktop_notifier import DesktopNotifierSync from desktop_notifier import DesktopNotifierSync
import psutil
import inspect
from .ocr import * from .ocr import *
from .config import Config from .config import config
from .screen_coordinate_picker import get_screen_selection from .screen_coordinate_picker import get_screen_selection
try: try:
@@ -51,9 +50,6 @@ except ImportError:
pass pass
config = None
class WindowsClipboardThread(threading.Thread): class WindowsClipboardThread(threading.Thread):
def __init__(self): def __init__(self):
super().__init__(daemon=True) super().__init__(daemon=True)
@@ -533,7 +529,7 @@ def are_images_identical(img1, img2):
return (img1.shape == img2.shape) and (img1 == img2).all() return (img1.shape == img2.shape) and (img1 == img2).all()
def process_and_write_results(img_or_path, write_to, notifications, last_result, filtering): def process_and_write_results(img_or_path, last_result, filtering):
if auto_pause_handler: if auto_pause_handler:
auto_pause_handler.stop() auto_pause_handler.stop()
@@ -549,9 +545,10 @@ def process_and_write_results(img_or_path, write_to, notifications, last_result,
text, orig_text = filtering(text, last_result) text, orig_text = filtering(text, last_result)
text = post_process(text) text = post_process(text)
logger.opt(ansi=True).info(f'Text recognized in {t1 - t0:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}') logger.opt(ansi=True).info(f'Text recognized in {t1 - t0:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
if notifications: if config.get_general('notifications'):
notifier.send(title='owocr', message='Text recognized: ' + text) notifier.send(title='owocr', message='Text recognized: ' + text)
write_to = config.get_general('write_to')
if write_to == 'websocket': if write_to == 'websocket':
websocket_server_thread.send_text(text) websocket_server_thread.send_text(text)
elif write_to == 'clipboard': elif write_to == 'clipboard':
@@ -572,50 +569,7 @@ def get_path_key(path):
return path, path.lstat().st_mtime return path, path.lstat().st_mtime
def init_config(): def run():
global config
config = Config()
def run(read_from=None,
write_to=None,
engine=None,
pause_at_startup=None,
ignore_flag=None,
delete_images=None,
notifications=None,
auto_pause=None,
combo_pause=None,
combo_engine_switch=None,
screen_capture_area=None,
screen_capture_delay_secs=None,
screen_capture_only_active_windows=None,
screen_capture_combo=None
):
"""
Japanese OCR client
Runs OCR in the background.
It can read images copied to the system clipboard or placed in a directory, images sent via a websocket or a Unix domain socket, or directly capture a screen (or a portion of it) or a window.
Recognized texts can be either saved to system clipboard, appended to a text file or sent via a websocket.
:param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.
:param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.
:param delay_secs: How often to check for new images, in seconds.
:param engine: OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".
:param pause_at_startup: Pause at startup.
:param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).
:param delete_images: Delete image files after processing when reading from a directory.
:param notifications: Show an operating system notification with the detected text.
:param auto_pause: Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture. 0 to disable.
:param combo_pause: Specifies a combo to wait on for pausing the program. As an example: "<ctrl>+<shift>+p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
:param combo_engine_switch: Specifies a combo to wait on for switching the OCR engine. As an example: "<ctrl>+<shift>+a". To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
:param screen_capture_area: Specifies area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).
:param screen_capture_delay_secs: Specifies the delay (in seconds) between screenshots when reading with screen capture.
:param screen_capture_only_active_windows: When reading with screen capture and screen_capture_area is a window name, specifies whether to only target the window while it's active.
:param screen_capture_combo: When reading with screen capture, specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
"""
logger.configure(handlers=[{'sink': sys.stderr, 'format': config.get_general('logger_format')}]) logger.configure(handlers=[{'sink': sys.stderr, 'format': config.get_general('logger_format')}])
if config.has_config: if config.has_config:
@@ -646,7 +600,7 @@ def run(read_from=None,
if engine_instance.available: if engine_instance.available:
engine_instances.append(engine_instance) engine_instances.append(engine_instance)
engine_keys.append(engine_class.key) engine_keys.append(engine_class.key)
if engine == engine_class.name: if config.get_general('engine') == engine_class.name:
default_engine = engine_class.key default_engine = engine_class.key
if len(engine_keys) == 0: if len(engine_keys) == 0:
@@ -657,18 +611,20 @@ def run(read_from=None,
global terminated global terminated
global paused global paused
global just_unpaused global just_unpaused
global first_pressed
global notifier global notifier
global auto_pause_handler global auto_pause_handler
read_from = config.get_general('read_from')
write_to = config.get_general('write_to')
terminated = False terminated = False
paused = pause_at_startup paused = config.get_general('pause_at_startup')
just_unpaused = True just_unpaused = True
first_pressed = None auto_pause = config.get_general('auto_pause')
auto_pause_handler = None auto_pause_handler = None
engine_index = engine_keys.index(default_engine) if default_engine != '' else 0 engine_index = engine_keys.index(default_engine) if default_engine != '' else 0
engine_color = config.get_general('engine_color') engine_color = config.get_general('engine_color')
delay_secs = config.get_general('delay_secs') delay_secs = config.get_general('delay_secs')
screen_capture_on_combo = False combo_pause = config.get_general('combo_pause')
combo_engine_switch = config.get_general('combo_engine_switch')
notifier = DesktopNotifierSync() notifier = DesktopNotifierSync()
key_combos = {} key_combos = {}
@@ -706,6 +662,7 @@ def run(read_from=None,
unix_socket_server_thread.start() unix_socket_server_thread.start()
read_from_readable = 'unix socket' read_from_readable = 'unix socket'
elif read_from == 'clipboard': elif read_from == 'clipboard':
ignore_flag = config.get_general('ignore_flag')
macos_clipboard_polling = False macos_clipboard_polling = False
windows_clipboard_polling = False windows_clipboard_polling = False
img = None img = None
@@ -726,11 +683,16 @@ def run(read_from=None,
read_from_readable = 'clipboard' read_from_readable = 'clipboard'
elif read_from == 'screencapture': elif read_from == 'screencapture':
screen_capture_area = config.get_general('screen_capture_area')
screen_capture_delay_secs = config.get_general('screen_capture_delay_secs')
screen_capture_combo = config.get_general('screen_capture_combo')
if screen_capture_combo != '': if screen_capture_combo != '':
screen_capture_on_combo = True screen_capture_on_combo = True
global screenshot_event global screenshot_event
screenshot_event = threading.Event() screenshot_event = threading.Event()
key_combos[screen_capture_combo] = on_screenshot_combo key_combos[screen_capture_combo] = on_screenshot_combo
else:
screen_capture_on_combo = False
if type(screen_capture_area) == tuple: if type(screen_capture_area) == tuple:
screen_capture_area = ','.join(map(str, screen_capture_area)) screen_capture_area = ','.join(map(str, screen_capture_area))
global screencapture_window_active global screencapture_window_active
@@ -786,6 +748,7 @@ def run(read_from=None,
sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height} sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height}
logger.opt(ansi=True).info(f'Selected coordinates: {coord_left},{coord_top},{coord_width},{coord_height}') logger.opt(ansi=True).info(f'Selected coordinates: {coord_left},{coord_top},{coord_width},{coord_height}')
else: else:
screen_capture_only_active_windows = config.get_general('screen_capture_only_active_windows')
area_invalid_error = '"screen_capture_area" must be empty, "screen_N" where N is a screen number starting from 1, a valid set of coordinates, or a valid window name' area_invalid_error = '"screen_capture_area" must be empty, "screen_N" where N is a screen number starting from 1, a valid set of coordinates, or a valid window name'
if sys.platform == 'darwin': if sys.platform == 'darwin':
if int(platform.mac_ver()[0].split('.')[0]) < 14: if int(platform.mac_ver()[0].split('.')[0]) < 14:
@@ -844,6 +807,8 @@ def run(read_from=None,
filtering = TextFiltering() filtering = TextFiltering()
read_from_readable = 'screen capture' read_from_readable = 'screen capture'
else: else:
delete_images = config.get_general('delete_images')
read_from = Path(read_from) read_from = Path(read_from)
if not read_from.is_dir(): if not read_from.is_dir():
raise ValueError('read_from must be either "websocket", "unixsocket", "clipboard", "screencapture", or a path to a directory') raise ValueError('read_from must be either "websocket", "unixsocket", "clipboard", "screencapture", or a path to a directory')
@@ -882,7 +847,7 @@ def run(read_from=None,
else: else:
if not paused: if not paused:
img = Image.open(io.BytesIO(item)) img = Image.open(io.BytesIO(item))
process_and_write_results(img, write_to, notifications, None, None) process_and_write_results(img, None, None)
elif read_from == 'unixsocket': elif read_from == 'unixsocket':
while True: while True:
try: try:
@@ -892,7 +857,7 @@ def run(read_from=None,
else: else:
if not paused: if not paused:
img = Image.open(io.BytesIO(item)) img = Image.open(io.BytesIO(item))
process_and_write_results(img, write_to, notifications, None, None) process_and_write_results(img, None, None)
elif read_from == 'clipboard': elif read_from == 'clipboard':
process_clipboard = False process_clipboard = False
if windows_clipboard_polling: if windows_clipboard_polling:
@@ -940,7 +905,7 @@ def run(read_from=None,
process_clipboard = True process_clipboard = True
if process_clipboard: if process_clipboard:
process_and_write_results(img, write_to, notifications, None, None) process_and_write_results(img, None, None)
just_unpaused = False just_unpaused = False
@@ -1004,7 +969,7 @@ def run(read_from=None,
else: else:
sct_img = sct.grab(sct_params) sct_img = sct.grab(sct_params)
img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX') img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
res = process_and_write_results(img, write_to, notifications, last_result, filtering) res = process_and_write_results(img, last_result, filtering)
if res: if res:
last_result = (res, engine_index) last_result = (res, engine_index)
delay = screen_capture_delay_secs delay = screen_capture_delay_secs
@@ -1027,7 +992,7 @@ def run(read_from=None,
except (UnidentifiedImageError, OSError) as e: except (UnidentifiedImageError, OSError) as e:
logger.warning(f'Error while reading file {path}: {e}') logger.warning(f'Error while reading file {path}: {e}')
else: else:
process_and_write_results(img, write_to, notifications, None, None) process_and_write_results(img, None, None)
img.close() img.close()
if delete_images: if delete_images:
Path.unlink(path) Path.unlink(path)

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "owocr" name = "owocr"
version = "1.13.10" version = "1.13.11"
description = "Japanese OCR" description = "Japanese OCR"
readme = "README.md" readme = "README.md"
requires-python = ">=3.11" requires-python = ">=3.11"
@@ -16,7 +16,6 @@ classifiers = [
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",
] ]
dependencies = [ dependencies = [
"fire",
"jaconv", "jaconv",
"loguru", "loguru",
"numpy", "numpy",