diff --git a/README.md b/README.md index c09ce64..5210897 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ Additionally: - You can switch between OCR providers pressing their corresponding keyboard key inside the terminal window (refer to the list of keys in the providers list below) - You can start the script paused with the `-p` option or with a specific provider with the `-e` option (refer to `owocr -h` for the list) - Holding ctrl or cmd at any time will pause image processing temporarily, or you can specify keyboard combos in the config file to pause/unpause and switch the OCR provider from anywhere (refer to the config file or `owocr -h`) +- You can auto pause the script after a successful text recognition with the `-a=seconds` option if you're not using screen capture. 0 (the default) disables it. - You can enable notifications in the config file or with `-n` to show the text with a native OS notification. **Important for macOS users:** if you use Python from brew, you need to enter this command in your terminal before the first notification: `codesign -f -s - $(brew --cellar python)/3.*/Frameworks/Python.framework` (works on Ventura/Sonoma). Older macOS versions might require Python to be installed from the [official website](https://www.python.org/downloads/). Nothing can be done about this unfortunately. - Optionally, you can speed up the online providers by installing fpng-py: `pip install fpng-py` (requires setting up a developer environment on most operating systems/Python versions) - Optionally, you can improve filtering of non-Japanese text for screen capture by installing transformers and sentencepiece: `pip install transformers sentencepiece` diff --git a/owocr/config.py b/owocr/config.py index e6da84f..ef5d5cd 100644 --- a/owocr/config.py +++ b/owocr/config.py @@ -14,6 +14,7 @@ class Config: 'write_to': 'clipboard', 'engine': '', 'pause_at_startup': False, + 'auto_pause' : 0, 'ignore_flag': False, 'delete_images': False, 'engines': [], diff --git a/owocr/run.py b/owocr/run.py index 86ba5d1..c0531f3 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -307,6 +307,33 @@ class TextFiltering: return text, orig_text_filtered +class AutopauseTimer: + def __init__(self, timeout): + self.stop_event = threading.Event() + self.timeout = timeout + self.timer_thread = None + + def start(self): + self.stop() + self.stop_event.clear() + self.timer_thread = threading.Thread(target=self._countdown) + self.timer_thread.start() + + def stop(self): + if not self.stop_event.is_set() and self.timer_thread and self.timer_thread.is_alive(): + self.stop_event.set() + self.timer_thread.join() + + def _countdown(self): + seconds = self.timeout + while seconds > 0 and not self.stop_event.is_set(): + time.sleep(1) + seconds -= 1 + if not self.stop_event.is_set(): + self.stop_event.set() + pause_handler(True) + + def pause_handler(is_combo=True): global paused global just_unpaused @@ -316,6 +343,9 @@ def pause_handler(is_combo=True): else: message = 'Paused!' + if auto_pause_handler: + auto_pause_handler.stop() + if is_combo: notifier.send(title='owocr', message=message) logger.info(message) @@ -497,6 +527,9 @@ def process_and_write_results(img_or_path, write_to, notifications, last_result, else: with Path(write_to).open('a', encoding='utf-8') as f: f.write(text + '\n') + + if auto_pause_handler: + auto_pause_handler.start() else: logger.opt(ansi=True).info(f'<{engine_color}>{engine_instance.readable_name} reported an error after {t1 - t0:0.03f}s: {text}') @@ -519,6 +552,7 @@ def run(read_from=None, ignore_flag=None, delete_images=None, notifications=None, + auto_pause=None, combo_pause=None, combo_engine_switch=None, screen_capture_monitor=None, @@ -542,6 +576,7 @@ def run(read_from=None, :param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string). :param delete_images: Delete image files after processing when reading from a directory. :param notifications: Show an operating system notification with the detected text. + :param auto_pause: Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture. 0 to disable. :param combo_pause: Specifies a combo to wait on for pausing the program. As an example: "++p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key :param combo_engine_switch: Specifies a combo to wait on for switching the OCR engine. As an example: "++a". To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key :param screen_capture_monitor: Specifies monitor to target when reading with screen capture. Will be ignored when screen_capture_coords is a window name. @@ -597,10 +632,12 @@ def run(read_from=None, global just_unpaused global first_pressed global notifier + global auto_pause_handler terminated = False paused = pause_at_startup just_unpaused = True first_pressed = None + auto_pause_handler = None engine_index = engine_keys.index(default_engine) if default_engine != '' else 0 engine_color = config.get_general('engine_color') delay_secs = config.get_general('delay_secs') @@ -608,6 +645,9 @@ def run(read_from=None, notifier = DesktopNotifierSync() key_combos = {} + if read_from != 'screencapture' and auto_pause != 0: + auto_pause_handler = AutopauseTimer(auto_pause) + if combo_pause != '': key_combos[combo_pause] = pause_handler if combo_engine_switch != '': @@ -977,3 +1017,5 @@ def run(read_from=None, unix_socket_server.shutdown() unix_socket_server_thread.join() key_combo_listener.stop() + if auto_pause_handler: + auto_pause_handler.stop() diff --git a/owocr_config.ini b/owocr_config.ini index 47dc493..9442c35 100644 --- a/owocr_config.ini +++ b/owocr_config.ini @@ -3,6 +3,8 @@ ;engine = glens ;read_from = clipboard ;write_to = clipboard +;note: this specifies an amount of seconds to wait for auto pausing the program after a successful text recognition. Will be ignored when reading with screen capture. 0 to disable. +;auto_pause = 0 ;pause_at_startup = False ;logger_format = {time:HH:mm:ss.SSS} | {message} ;engine_color = cyan