diff --git a/README.md b/README.md index 06e9f5a..eb8f272 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ This has been tested with Python 3.11. Newer/older versions might work. It can b It mostly functions like Manga OCR: https://github.com/kha-white/manga-ocr?tab=readme-ov-file#running-in-the-background However: - it supports reading images and/or writing text to a websocket when the -r=websocket and/or -w=websocket parameters are specified (port 7331 by default, configurable in the config file) -- it supports capturing the screen directly with -r screencapture. It will default to the entire first screen every 3 seconds, but a different screen/coordinates/delay can be specified in the config file +- it supports capturing the screen directly with -r screencapture. It will default to the entire first screen every 3 seconds, but a different screen/coordinates/window/delay can be specified in the config file - you can pause/unpause the image processing by pressing "p" or terminate the script with "t" or "q" - you can switch OCR provider with its corresponding keyboard key (refer to the list above). You can also start the script paused with the -p option or with a specific provider with the -e option (refer to `owocr -h` for the list) - holding ctrl or cmd at any time will pause image processing temporarily diff --git a/owocr/run.py b/owocr/run.py index 06ae460..de7786b 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -7,6 +7,7 @@ import fire import numpy as np import pyperclipfix import mss +import pywinctl import asyncio import websockets import queue @@ -196,6 +197,23 @@ def on_key_release(key): first_pressed = None +def on_window_activated(active): + global screencapture_window_active + screencapture_window_active = active + + +def on_window_resized(size): + global sct_params + sct_params['width'] = size[0] + sct_params['height'] = size[1] + + +def on_window_moved(pos): + global sct_params + sct_params['left'] = pos[0] + sct_params['top'] = pos[1] + + def are_images_identical(img1, img2): if None in (img1, img2): return img1 == img2 @@ -273,7 +291,7 @@ def run(read_from='clipboard', websocket_port = 7331 notifications = False screen_capture_monitor = 1 - screen_capture_coords = 'whole' + screen_capture_coords = '' screen_capture_delay_secs = 3 if not config: @@ -306,7 +324,7 @@ def run(read_from='clipboard', screen_capture_delay_secs = config.get_general('screen_capture_delay_secs') if config.get_general('screen_capture_coords'): - screen_capture_coords = config.get_general('screen_capture_coords').lower() + screen_capture_coords = config.get_general('screen_capture_coords') logger.configure(handlers=[{'sink': sys.stderr, 'format': logger_format}]) @@ -385,20 +403,48 @@ def run(read_from='clipboard', else: generic_clipboard_polling = True elif read_from == 'screencapture': + global screencapture_window_active + screencapture_window_mode = False + screencapture_window_active = True with mss.mss() as sct: mon = sct.monitors if len(mon) <= screen_capture_monitor: msg = '"screen_capture_monitor" has to be a valid monitor number!' raise ValueError(msg) - if screen_capture_coords == 'whole': + if screen_capture_coords == '': coord_left = mon[screen_capture_monitor]["left"] coord_top = mon[screen_capture_monitor]["top"] coord_width = mon[screen_capture_monitor]["width"] coord_height = mon[screen_capture_monitor]["height"] - else: + elif len(screen_capture_coords.split(',')) == 4: x, y, coord_width, coord_height = [int(c.strip()) for c in screen_capture_coords.split(',')] coord_left = mon[screen_capture_monitor]["left"] + x coord_top = mon[screen_capture_monitor]["top"] + y + else: + window_titles = pywinctl.getAllTitles() + if screen_capture_coords in window_titles: + window_title = screen_capture_coords + else: + for window_title in window_titles: + if screen_capture_coords in window_title: + break + + windows = pywinctl.getWindowsWithTitle(window_title) + if len(windows) == 0: + msg = '"screen_capture_coords" has to be empty (for the whole screen), a valid set of coordinates, or a valid window name!' + raise ValueError(msg) + + screencapture_window_mode = True + target_window = windows[0] + coord_top = target_window.top + coord_left = target_window.left + coord_width = target_window.width + coord_height = target_window.height + screencapture_window_active = target_window.isActive + target_window.watchdog.start(isActiveCB=on_window_activated, resizedCB=on_window_resized, movedCB=on_window_moved) + target_window.watchdog.setTryToFind(True) + + global sct_params sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height, 'mon': screen_capture_monitor} logger.opt(ansi=True).info(f"Reading with screen capture using <{engine_color}>{engine_instances[engine_index].readable_name}{' (paused)' if paused else ''}") @@ -423,6 +469,8 @@ def run(read_from='clipboard', if read_from == 'clipboard' and windows_clipboard_polling: win32api.PostThreadMessage(windows_clipboard_thread.thread_id, win32con.WM_QUIT, 0, 0) windows_clipboard_thread.join() + if read_from == 'screencapture' and screencapture_window_mode: + target_window.watchdog.stop() user_input_thread.join() tmp_paused_listener.stop() break @@ -470,7 +518,7 @@ def run(read_from='clipboard', if not windows_clipboard_polling: time.sleep(delay_secs) elif read_from == 'screencapture': - if not paused and not tmp_paused: + if screencapture_window_active and not paused and not tmp_paused: with mss.mss() as sct: sct_img = sct.grab(sct_params) img = Image.frombytes("RGB", sct_img.size, sct_img.bgra, "raw", "BGRX") diff --git a/owocr_config.ini b/owocr_config.ini index 818b121..912aba2 100644 --- a/owocr_config.ini +++ b/owocr_config.ini @@ -12,8 +12,10 @@ ;ignore_flag = False ;delete_images = False ;screen_capture_monitor = 2 +;note: screen_capture_coords can be empty (whole screen), have a set of coordinates (x,y,width,height) or a window name (the first matching window title will be used) +;screen_capture_coords = ;screen_capture_coords = 400,200,1500,600 -;screen_capture_coords = whole +;screen_capture_coords = OBS ;screen_capture_delay_secs = 3 [winrtocr] ;url = http://aaa.xxx.yyy.zzz:8000 diff --git a/requirements.txt b/requirements.txt index 70ed981..92610de 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,5 +8,6 @@ pynput websockets notify-py mss +pywinctl pywin32;platform_system=='Windows' pyobjc;platform_system=='Darwin' \ No newline at end of file diff --git a/setup.py b/setup.py index 55030b2..6120eb8 100644 --- a/setup.py +++ b/setup.py @@ -29,6 +29,7 @@ setup( "websockets", "notify-py", "mss", + "pywinctl", "pywin32;platform_system=='Windows'", "pyobjc;platform_system=='Darwin'" ],