Add an option to wait for a combo to take screenshots instead of waiting for a delay

2024-01-30 14:40:24 +01:00
parent dff32e61fd
commit 1b88f2a6c5
4 changed files with 40 additions and 11 deletions
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ This has been tested with Python 3.11. Newer/older versions might work. It can b
 It mostly functions like Manga OCR: https://github.com/kha-white/manga-ocr?tab=readme-ov-file#running-in-the-background
 However:
 - it supports reading images and/or writing text to a websocket when the -r=websocket and/or -w=websocket parameters are specified (port 7331 by default, configurable in the config file)
- it supports capturing the screen directly with -r screencapture. It will default to the entire first screen every 3 seconds, but a different screen/coordinates/window/delay can be specified in the config file
+- it supports capturing the screen directly with -r screencapture. It will default to the entire first screen every 3 seconds, but a different screen/coordinates/window/delay can be specified in the config file. Instead of using a delay it's also possible to specify a keyboard combo (refer to the config file or the help page)
 - you can pause/unpause the image processing by pressing "p" or terminate the script with "t" or "q"
 - you can switch OCR provider with its corresponding keyboard key (refer to the list above). You can also start the script paused with the -p option or with a specific provider with the -e option (refer to `owocr -h` for the list)
 - holding ctrl or cmd at any time will pause image processing temporarily
--- a/owocr/config.py
+++ b/owocr/config.py
@@ -22,7 +22,8 @@ class Config:
        'screen_capture_monitor': 1,
        'screen_capture_coords': '',
        'screen_capture_delay_secs': 3,
-        'screen_capture_only_active_windows': True
+        'screen_capture_only_active_windows': True,
+        'screen_capture_combo': ''
    }

    def __parse(self, value):
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -199,6 +199,11 @@ def on_key_release(key):
        first_pressed = None


+def on_screenshot_combo():
+    if not paused:
+        screenshot_event.set()
+
+
 def signal_handler(sig, frame):
    global terminated
    logger.info('Terminated!')
@@ -293,7 +298,8 @@ def run(read_from=None,
        screen_capture_monitor=None,
        screen_capture_coords=None,
        screen_capture_delay_secs=None,
-        screen_capture_only_active_windows=None
+        screen_capture_only_active_windows=None,
+        screen_capture_combo=None
        ):
    """
    Japanese OCR client
@@ -313,6 +319,7 @@ def run(read_from=None,
    :param screen_capture_coords: Specifies area to target when reading with screen capture. Can be either empty (whole screen), a set of coordinates (x,y,width,height) or a window name (the first matching window title will be used).
    :param screen_capture_delay_secs: Specifies the delay (in seconds) between screenshots when reading with screen capture.
    :param screen_capture_only_active_windows: When reading with screen capture and screen_capture_coords is a window name, specifies whether to only target the window while it's active.
+    :param screen_capture_combo: When reading with screen capture, specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
    """

    if read_from == 'screencapture':
@@ -365,15 +372,11 @@ def run(read_from=None,
    engine_index = engine_keys.index(default_engine) if default_engine != '' else 0
    engine_color = config.get_general('engine_color')
    delay_secs = config.get_general('delay_secs')
+    screen_capture_on_combo = False

    user_input_thread = threading.Thread(target=user_input_thread_run, args=(engine_instances, engine_keys), daemon=True)
    user_input_thread.start()

-    tmp_paused_listener = keyboard.Listener(
-        on_press=on_key_press,
-        on_release=on_key_release)
-    tmp_paused_listener.start()
-
    if read_from == 'websocket' or write_to == 'websocket':
        global websocket_server_thread
        websocket_server_thread = WebsocketServerThread(read_from == 'websocket')
@@ -406,6 +409,10 @@ def run(read_from=None,
        else:
            generic_clipboard_polling = True
    elif read_from == 'screencapture':
+        if screen_capture_combo != '':
+            screen_capture_on_combo = True
+            global screenshot_event
+            screenshot_event = threading.Event()
        if type(screen_capture_coords) == tuple:
            screen_capture_coords = ','.join(map(str, screen_capture_coords))
        global screencapture_window_active
@@ -472,6 +479,15 @@ def run(read_from=None,
            if path.suffix.lower() in allowed_extensions:
                old_paths.add(get_path_key(path))

+    if screen_capture_on_combo:
+        tmp_paused_listener = keyboard.GlobalHotKeys({
+            screen_capture_combo: on_screenshot_combo})
+    else:
+        tmp_paused_listener = keyboard.Listener(
+            on_press=on_key_press,
+            on_release=on_key_release)
+    tmp_paused_listener.start()
+
    signal.signal(signal.SIGINT, signal_handler)
    while not terminated:
        if read_from == 'websocket':
@@ -517,13 +533,23 @@ def run(read_from=None,
            if not windows_clipboard_polling:
                time.sleep(delay_secs)
        elif read_from == 'screencapture':
-            if screencapture_window_active and screencapture_window_visible and not paused and not tmp_paused:
+            if screen_capture_on_combo:
+                take_screenshot = screenshot_event.wait(delay_secs)
+                if take_screenshot:
+                    screenshot_event.clear()
+            else:
+                take_screenshot = screencapture_window_active and not (paused or tmp_paused)
+
+            if take_screenshot and screencapture_window_visible:
                sct_img = sct.grab(sct_params)
                img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
                process_and_write_results(engine_instances[engine_index], img, write_to)
-                time.sleep(screen_capture_delay_secs)
+                delay = screen_capture_delay_secs
            else:
-                time.sleep(delay_secs)
+                delay = delay_secs
+
+            if not screen_capture_on_combo:
+                time.sleep(delay)
        else:
            for path in read_from.iterdir():
                if path.suffix.lower() in allowed_extensions:
--- a/owocr_config.ini
+++ b/owocr_config.ini
@@ -19,6 +19,8 @@
 ;note: if screen_capture_coords is a window name, this can be changed to capture inactive windows too. In that case, make sure the window is not covered by other windows!
 ;screen_capture_only_active_windows = True
 ;screen_capture_delay_secs = 3
+;note: this specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: <ctrl>+<shift>+s. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
+;screen_capture_combo = <ctrl>+<shift>+s
 [winrtocr]
 ;url = http://aaa.xxx.yyy.zzz:8000
 [azure]