diff --git a/README.md b/README.md index b0660d5..784d9be 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,8 @@ Additionally: - You can switch between OCR providers pressing their corresponding keyboard key inside the terminal window (refer to the list of keys in the providers list below) - You can start the script paused with the `-p` option or with a specific provider with the `-e` option (refer to `owocr -h` for the list) - You can specify keyboard combos in the config file to pause/unpause and switch the OCR provider from anywhere (refer to the config file or `owocr -h`) -- You can auto pause the script after a successful text recognition with the `-a=seconds` option if you're not using screen capture. 0 (the default) disables it. -- You can enable notifications in the config file or with `-n` to show the text with a native OS notification. **Important for macOS users:** if you use Python from brew, you need to enter this command in your terminal before the first notification: `codesign -f -s - $(brew --cellar python)/3.*/Frameworks/Python.framework` (works on Ventura/Sonoma). Older macOS versions might require Python to be installed from the [official website](https://www.python.org/downloads/). Nothing can be done about this unfortunately. +- You can auto pause the script after a successful text recognition with the `-a=seconds` option if you're not using screen capture with automatic screenshots. 0 (the default) disables it. +- You can enable notifications in the config file or with `-n` to show the text with a native OS notification if you're not using screen capture with automatic screenshots. **Important for macOS users:** if you use Python from brew, you need to enter this command in your terminal before the first notification: `codesign -f -s - $(brew --cellar python)/3.*/Frameworks/Python.framework` (works on Ventura/Sonoma). Older macOS versions might require Python to be installed from the [official website](https://www.python.org/downloads/). Nothing can be done about this unfortunately. - Optionally, you can speed up the online providers by installing fpng-py: `pip install owocr[faster-png]` (requires setting up a developer environment on most operating systems/Python versions) - Optionally, you can improve filtering of non-Japanese text for screen capture by installing transformers and sentencepiece: `pip install owocr[accurate-filtering]` - A config file (which will be automatically created in `user directory/.config/owocr_config.ini`, on Windows `user directory` is the `C:\Users\yourusername` folder) can be used to configure the script, as an example to limit providers (to reduce clutter/memory usage) as well as specifying provider settings such as api keys etc. A sample config file is also provided [here](https://raw.githubusercontent.com/AuroraWright/owocr/master/owocr_config.ini) diff --git a/owocr/config.py b/owocr/config.py index 0391c5f..5d174b3 100644 --- a/owocr/config.py +++ b/owocr/config.py @@ -25,9 +25,9 @@ parser.add_argument('-i', '--ignore_flag', action='store_true', default=argparse parser.add_argument('-d', '--delete_images', action='store_true', default=argparse.SUPPRESS, help='Delete image files after processing when reading from a directory.') parser.add_argument('-n', '--notifications', action='store_true', default=argparse.SUPPRESS, - help='Show an operating system notification with the detected text.') + help='Show an operating system notification with the detected text. Will be ignored when reading with screen capture, unless screen_capture_combo is set.') parser.add_argument('-a', '--auto_pause', type=float, default=argparse.SUPPRESS, - help='Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture. 0 to disable.') + help='Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture, unless screen_capture_combo is set. 0 to disable.') parser.add_argument('-cp', '--combo_pause', type=str, default=argparse.SUPPRESS, help='Combo to wait on for pausing the program. As an example: "++p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key') parser.add_argument('-cs', '--combo_engine_switch', type=str, default=argparse.SUPPRESS, diff --git a/owocr/run.py b/owocr/run.py index a7b192e..99d3878 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -800,7 +800,7 @@ def on_screenshot_combo(): image_queue.put((img, True)) -def process_and_write_results(img_or_path, last_result, filtering): +def process_and_write_results(img_or_path, last_result, filtering, notify): if auto_pause_handler: auto_pause_handler.stop() @@ -816,7 +816,7 @@ def process_and_write_results(img_or_path, last_result, filtering): text, orig_text = filtering(text, last_result) text = post_process(text) logger.opt(ansi=True).info(f'Text recognized in {end_time - start_time:0.03f}s using <{engine_color}>{engine_instance.readable_name}: {text}') - if config.get_general('notifications'): + if notify and config.get_general('notifications'): notifier.send(title='owocr', message='Text recognized: ' + text) write_to = config.get_general('write_to') @@ -988,6 +988,7 @@ def run(): if process_queue: try: img, filter_img = image_queue.get(timeout=0.1) + notify = True except queue.Empty: pass @@ -995,6 +996,7 @@ def run(): if (not paused) and screencapture_window_active and screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs: img = take_screenshot() filter_img = True + notify = False last_screenshot_time = time.time() if img == 0: @@ -1003,11 +1005,11 @@ def run(): break elif img: if filter_img: - res = process_and_write_results(img, last_result, filtering) + res = process_and_write_results(img, last_result, filtering, notify) if res: last_result = (res, engine_index) else: - process_and_write_results(img, None, None) + process_and_write_results(img, None, None, notify) if isinstance(img, Path): if delete_images: Path.unlink(img)