Implemented window screen capture mode (only tested on macOS so far)
This commit is contained in:
@@ -25,7 +25,7 @@ This has been tested with Python 3.11. Newer/older versions might work. It can b
|
|||||||
It mostly functions like Manga OCR: https://github.com/kha-white/manga-ocr?tab=readme-ov-file#running-in-the-background
|
It mostly functions like Manga OCR: https://github.com/kha-white/manga-ocr?tab=readme-ov-file#running-in-the-background
|
||||||
However:
|
However:
|
||||||
- it supports reading images and/or writing text to a websocket when the -r=websocket and/or -w=websocket parameters are specified (port 7331 by default, configurable in the config file)
|
- it supports reading images and/or writing text to a websocket when the -r=websocket and/or -w=websocket parameters are specified (port 7331 by default, configurable in the config file)
|
||||||
- it supports capturing the screen directly with -r screencapture. It will default to the entire first screen every 3 seconds, but a different screen/coordinates/delay can be specified in the config file
|
- it supports capturing the screen directly with -r screencapture. It will default to the entire first screen every 3 seconds, but a different screen/coordinates/window/delay can be specified in the config file
|
||||||
- you can pause/unpause the image processing by pressing "p" or terminate the script with "t" or "q"
|
- you can pause/unpause the image processing by pressing "p" or terminate the script with "t" or "q"
|
||||||
- you can switch OCR provider with its corresponding keyboard key (refer to the list above). You can also start the script paused with the -p option or with a specific provider with the -e option (refer to `owocr -h` for the list)
|
- you can switch OCR provider with its corresponding keyboard key (refer to the list above). You can also start the script paused with the -p option or with a specific provider with the -e option (refer to `owocr -h` for the list)
|
||||||
- holding ctrl or cmd at any time will pause image processing temporarily
|
- holding ctrl or cmd at any time will pause image processing temporarily
|
||||||
|
|||||||
58
owocr/run.py
58
owocr/run.py
@@ -7,6 +7,7 @@ import fire
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pyperclipfix
|
import pyperclipfix
|
||||||
import mss
|
import mss
|
||||||
|
import pywinctl
|
||||||
import asyncio
|
import asyncio
|
||||||
import websockets
|
import websockets
|
||||||
import queue
|
import queue
|
||||||
@@ -196,6 +197,23 @@ def on_key_release(key):
|
|||||||
first_pressed = None
|
first_pressed = None
|
||||||
|
|
||||||
|
|
||||||
|
def on_window_activated(active):
|
||||||
|
global screencapture_window_active
|
||||||
|
screencapture_window_active = active
|
||||||
|
|
||||||
|
|
||||||
|
def on_window_resized(size):
|
||||||
|
global sct_params
|
||||||
|
sct_params['width'] = size[0]
|
||||||
|
sct_params['height'] = size[1]
|
||||||
|
|
||||||
|
|
||||||
|
def on_window_moved(pos):
|
||||||
|
global sct_params
|
||||||
|
sct_params['left'] = pos[0]
|
||||||
|
sct_params['top'] = pos[1]
|
||||||
|
|
||||||
|
|
||||||
def are_images_identical(img1, img2):
|
def are_images_identical(img1, img2):
|
||||||
if None in (img1, img2):
|
if None in (img1, img2):
|
||||||
return img1 == img2
|
return img1 == img2
|
||||||
@@ -273,7 +291,7 @@ def run(read_from='clipboard',
|
|||||||
websocket_port = 7331
|
websocket_port = 7331
|
||||||
notifications = False
|
notifications = False
|
||||||
screen_capture_monitor = 1
|
screen_capture_monitor = 1
|
||||||
screen_capture_coords = 'whole'
|
screen_capture_coords = ''
|
||||||
screen_capture_delay_secs = 3
|
screen_capture_delay_secs = 3
|
||||||
|
|
||||||
if not config:
|
if not config:
|
||||||
@@ -306,7 +324,7 @@ def run(read_from='clipboard',
|
|||||||
screen_capture_delay_secs = config.get_general('screen_capture_delay_secs')
|
screen_capture_delay_secs = config.get_general('screen_capture_delay_secs')
|
||||||
|
|
||||||
if config.get_general('screen_capture_coords'):
|
if config.get_general('screen_capture_coords'):
|
||||||
screen_capture_coords = config.get_general('screen_capture_coords').lower()
|
screen_capture_coords = config.get_general('screen_capture_coords')
|
||||||
|
|
||||||
logger.configure(handlers=[{'sink': sys.stderr, 'format': logger_format}])
|
logger.configure(handlers=[{'sink': sys.stderr, 'format': logger_format}])
|
||||||
|
|
||||||
@@ -385,20 +403,48 @@ def run(read_from='clipboard',
|
|||||||
else:
|
else:
|
||||||
generic_clipboard_polling = True
|
generic_clipboard_polling = True
|
||||||
elif read_from == 'screencapture':
|
elif read_from == 'screencapture':
|
||||||
|
global screencapture_window_active
|
||||||
|
screencapture_window_mode = False
|
||||||
|
screencapture_window_active = True
|
||||||
with mss.mss() as sct:
|
with mss.mss() as sct:
|
||||||
mon = sct.monitors
|
mon = sct.monitors
|
||||||
if len(mon) <= screen_capture_monitor:
|
if len(mon) <= screen_capture_monitor:
|
||||||
msg = '"screen_capture_monitor" has to be a valid monitor number!'
|
msg = '"screen_capture_monitor" has to be a valid monitor number!'
|
||||||
raise ValueError(msg)
|
raise ValueError(msg)
|
||||||
if screen_capture_coords == 'whole':
|
if screen_capture_coords == '':
|
||||||
coord_left = mon[screen_capture_monitor]["left"]
|
coord_left = mon[screen_capture_monitor]["left"]
|
||||||
coord_top = mon[screen_capture_monitor]["top"]
|
coord_top = mon[screen_capture_monitor]["top"]
|
||||||
coord_width = mon[screen_capture_monitor]["width"]
|
coord_width = mon[screen_capture_monitor]["width"]
|
||||||
coord_height = mon[screen_capture_monitor]["height"]
|
coord_height = mon[screen_capture_monitor]["height"]
|
||||||
else:
|
elif len(screen_capture_coords.split(',')) == 4:
|
||||||
x, y, coord_width, coord_height = [int(c.strip()) for c in screen_capture_coords.split(',')]
|
x, y, coord_width, coord_height = [int(c.strip()) for c in screen_capture_coords.split(',')]
|
||||||
coord_left = mon[screen_capture_monitor]["left"] + x
|
coord_left = mon[screen_capture_monitor]["left"] + x
|
||||||
coord_top = mon[screen_capture_monitor]["top"] + y
|
coord_top = mon[screen_capture_monitor]["top"] + y
|
||||||
|
else:
|
||||||
|
window_titles = pywinctl.getAllTitles()
|
||||||
|
if screen_capture_coords in window_titles:
|
||||||
|
window_title = screen_capture_coords
|
||||||
|
else:
|
||||||
|
for window_title in window_titles:
|
||||||
|
if screen_capture_coords in window_title:
|
||||||
|
break
|
||||||
|
|
||||||
|
windows = pywinctl.getWindowsWithTitle(window_title)
|
||||||
|
if len(windows) == 0:
|
||||||
|
msg = '"screen_capture_coords" has to be empty (for the whole screen), a valid set of coordinates, or a valid window name!'
|
||||||
|
raise ValueError(msg)
|
||||||
|
|
||||||
|
screencapture_window_mode = True
|
||||||
|
target_window = windows[0]
|
||||||
|
coord_top = target_window.top
|
||||||
|
coord_left = target_window.left
|
||||||
|
coord_width = target_window.width
|
||||||
|
coord_height = target_window.height
|
||||||
|
screencapture_window_active = target_window.isActive
|
||||||
|
target_window.watchdog.start(isActiveCB=on_window_activated, resizedCB=on_window_resized, movedCB=on_window_moved)
|
||||||
|
target_window.watchdog.setTryToFind(True)
|
||||||
|
|
||||||
|
global sct_params
|
||||||
sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height, 'mon': screen_capture_monitor}
|
sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height, 'mon': screen_capture_monitor}
|
||||||
|
|
||||||
logger.opt(ansi=True).info(f"Reading with screen capture using <{engine_color}>{engine_instances[engine_index].readable_name}</{engine_color}>{' (paused)' if paused else ''}")
|
logger.opt(ansi=True).info(f"Reading with screen capture using <{engine_color}>{engine_instances[engine_index].readable_name}</{engine_color}>{' (paused)' if paused else ''}")
|
||||||
@@ -423,6 +469,8 @@ def run(read_from='clipboard',
|
|||||||
if read_from == 'clipboard' and windows_clipboard_polling:
|
if read_from == 'clipboard' and windows_clipboard_polling:
|
||||||
win32api.PostThreadMessage(windows_clipboard_thread.thread_id, win32con.WM_QUIT, 0, 0)
|
win32api.PostThreadMessage(windows_clipboard_thread.thread_id, win32con.WM_QUIT, 0, 0)
|
||||||
windows_clipboard_thread.join()
|
windows_clipboard_thread.join()
|
||||||
|
if read_from == 'screencapture' and screencapture_window_mode:
|
||||||
|
target_window.watchdog.stop()
|
||||||
user_input_thread.join()
|
user_input_thread.join()
|
||||||
tmp_paused_listener.stop()
|
tmp_paused_listener.stop()
|
||||||
break
|
break
|
||||||
@@ -470,7 +518,7 @@ def run(read_from='clipboard',
|
|||||||
if not windows_clipboard_polling:
|
if not windows_clipboard_polling:
|
||||||
time.sleep(delay_secs)
|
time.sleep(delay_secs)
|
||||||
elif read_from == 'screencapture':
|
elif read_from == 'screencapture':
|
||||||
if not paused and not tmp_paused:
|
if screencapture_window_active and not paused and not tmp_paused:
|
||||||
with mss.mss() as sct:
|
with mss.mss() as sct:
|
||||||
sct_img = sct.grab(sct_params)
|
sct_img = sct.grab(sct_params)
|
||||||
img = Image.frombytes("RGB", sct_img.size, sct_img.bgra, "raw", "BGRX")
|
img = Image.frombytes("RGB", sct_img.size, sct_img.bgra, "raw", "BGRX")
|
||||||
|
|||||||
@@ -12,8 +12,10 @@
|
|||||||
;ignore_flag = False
|
;ignore_flag = False
|
||||||
;delete_images = False
|
;delete_images = False
|
||||||
;screen_capture_monitor = 2
|
;screen_capture_monitor = 2
|
||||||
|
;note: screen_capture_coords can be empty (whole screen), have a set of coordinates (x,y,width,height) or a window name (the first matching window title will be used)
|
||||||
|
;screen_capture_coords =
|
||||||
;screen_capture_coords = 400,200,1500,600
|
;screen_capture_coords = 400,200,1500,600
|
||||||
;screen_capture_coords = whole
|
;screen_capture_coords = OBS
|
||||||
;screen_capture_delay_secs = 3
|
;screen_capture_delay_secs = 3
|
||||||
[winrtocr]
|
[winrtocr]
|
||||||
;url = http://aaa.xxx.yyy.zzz:8000
|
;url = http://aaa.xxx.yyy.zzz:8000
|
||||||
|
|||||||
@@ -8,5 +8,6 @@ pynput
|
|||||||
websockets
|
websockets
|
||||||
notify-py
|
notify-py
|
||||||
mss
|
mss
|
||||||
|
pywinctl
|
||||||
pywin32;platform_system=='Windows'
|
pywin32;platform_system=='Windows'
|
||||||
pyobjc;platform_system=='Darwin'
|
pyobjc;platform_system=='Darwin'
|
||||||
Reference in New Issue
Block a user