Deprecate x11 window capture since wayland is standard almost everywhere now and I have no way to test it

This commit is contained in:
AuroraWright
2025-04-10 01:22:02 +02:00
parent 8dac199a8f
commit 5e4168ef2b
4 changed files with 48 additions and 106 deletions

View File

@@ -13,8 +13,8 @@ Basic usage is comparable to Manga OCR as in, `owocr` keeps scanning for images
Additionally: Additionally:
- Scanning the clipboard takes basically zero system resources on macOS and Windows - Scanning the clipboard takes basically zero system resources on macOS and Windows
- Supports reading images and/or writing text to a websocket with the `-r=websocket` and/or `-w=websocket` parameters (the port is 7331 by default, and is configurable in the config file) - Supports reading images and/or writing text to a websocket with the `-r=websocket` and/or `-w=websocket` parameters (the port is 7331 by default, and is configurable in the config file)
- Supports reading images from a Unix domain socket (`/tmp/owocr.sock`) on macOS and Linux with `-r=unixsocket` - On macOS and Linux, supports reading images from a Unix domain socket (`/tmp/owocr.sock`) with `-r=unixsocket`
- Supports capturing from the screen directly or from a specific window with `-r=screencapture`. By default it will open a coordinate picker so you can select an area of the screen and then read from it every 3 seconds, but you can change it to screenshot the whole screen, a manual set of coordinates `x,y,width,height` or just a specific window (with the window title). You can also change the delay between screenshots or specify a keyboard combo if you don't want screenshots to be taken periodically. Refer to the config file or to `owocr --help` for more details about the screen capture settings - On Windows and macOS, supports capturing from the screen directly or from a specific window with `-r=screencapture`. By default it will open a coordinate picker so you can select an area of the screen and then read from it every 3 seconds, but you can change it to screenshot the whole screen, a manual set of coordinates `x,y,width,height` or just a specific window (with the window title). You can also change the delay between screenshots or specify a keyboard combo if you don't want screenshots to be taken periodically. Refer to the config file or to `owocr --help` for more details about the screen capture settings
- You can pause/unpause the image processing by pressing "p" or terminate the script with "t" or "q" inside the terminal window - You can pause/unpause the image processing by pressing "p" or terminate the script with "t" or "q" inside the terminal window
- You can switch between OCR providers pressing their corresponding keyboard key inside the terminal window (refer to the list of keys in the providers list below) - You can switch between OCR providers pressing their corresponding keyboard key inside the terminal window (refer to the list of keys in the providers list below)
- You can start the script paused with the `-p` option or with a specific provider with the `-e` option (refer to `owocr -h` for the list) - You can start the script paused with the `-p` option or with a specific provider with the `-e` option (refer to `owocr -h` for the list)

View File

@@ -50,11 +50,6 @@ try:
except ImportError: except ImportError:
pass pass
try:
import pywinctl
except ImportError:
pass
config = None config = None
@@ -512,18 +507,6 @@ def on_window_minimized(minimized):
screencapture_window_visible = not minimized screencapture_window_visible = not minimized
def on_window_resized(size):
global sct_params
sct_params['width'] = size[0]
sct_params['height'] = size[1]
def on_window_moved(pos):
global sct_params
sct_params['left'] = pos[0]
sct_params['top'] = pos[1]
def normalize_macos_clipboard(img): def normalize_macos_clipboard(img):
ns_data = NSData.dataWithBytes_length_(img, len(img)) ns_data = NSData.dataWithBytes_length_(img, len(img))
ns_image = NSImage.alloc().initWithData_(ns_data) ns_image = NSImage.alloc().initWithData_(ns_data)
@@ -647,14 +630,6 @@ def run(read_from=None,
:param screen_capture_combo: When reading with screen capture, specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key :param screen_capture_combo: When reading with screen capture, specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
""" """
if read_from == 'screencapture' and sys.platform not in ('darwin', 'win32'):
window_capture_available = False
try:
active_window_name = pywinctl.getActiveWindowTitle()
window_capture_available = True
except Exception:
pass
logger.configure(handlers=[{'sink': sys.stderr, 'format': config.get_general('logger_format')}]) logger.configure(handlers=[{'sink': sys.stderr, 'format': config.get_general('logger_format')}])
if config.has_config: if config.has_config:
@@ -878,38 +853,7 @@ def run(read_from=None,
windows_window_tracker.start() windows_window_tracker.start()
logger.opt(ansi=True).info(f'Selected window: {window_title}') logger.opt(ansi=True).info(f'Selected window: {window_title}')
else: else:
if not window_capture_available: raise ValueError('Window capture is only currently supported on Windows and macOS')
raise ValueError('Window capture is not available on your setup')
sct = mss.mss()
window_title = None
window_titles = pywinctl.getAllTitles()
if screen_capture_area in window_titles:
window_title = screen_capture_area
else:
for t in window_titles:
if screen_capture_area in t and t != active_window_name:
window_title = t
break
if not window_title:
raise ValueError(area_invalid_error)
target_window = pywinctl.getWindowsWithTitle(window_title)[0]
coord_top = target_window.top
coord_left = target_window.left
coord_width = target_window.width
coord_height = target_window.height
if screen_capture_only_active_windows:
screencapture_window_active = target_window.isActive
target_window.watchdog.start(isAliveCB=on_window_closed, isActiveCB=on_window_activated, resizedCB=on_window_resized, movedCB=on_window_moved)
else:
screencapture_window_visible = not target_window.isMinimized
target_window.watchdog.start(isAliveCB=on_window_closed, isMinimizedCB=on_window_minimized, resizedCB=on_window_resized, movedCB=on_window_moved)
sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height}
logger.opt(ansi=True).info(f'Selected window: {window_title}')
filtering = TextFiltering() filtering = TextFiltering()
read_from_readable = 'screen capture' read_from_readable = 'screen capture'
@@ -1027,51 +971,52 @@ def run(read_from=None,
take_screenshot = screencapture_window_active and not paused take_screenshot = screencapture_window_active and not paused
if take_screenshot and screencapture_window_visible: if take_screenshot and screencapture_window_visible:
if screencapture_mode == 2 and sys.platform == 'darwin': if screencapture_mode == 2:
with objc.autorelease_pool(): if sys.platform == 'darwin':
if old_macos_screenshot_api: with objc.autorelease_pool():
cg_image = CGWindowListCreateImageFromArray(CGRectNull, [window_id], kCGWindowImageBoundsIgnoreFraming) if old_macos_screenshot_api:
else: cg_image = CGWindowListCreateImageFromArray(CGRectNull, [window_id], kCGWindowImageBoundsIgnoreFraming)
capture_macos_window_screenshot(window_id) else:
try: capture_macos_window_screenshot(window_id)
cg_image = screencapturekit_queue.get(timeout=0.5) try:
except queue.Empty: cg_image = screencapturekit_queue.get(timeout=0.5)
cg_image = None except queue.Empty:
if not cg_image: cg_image = None
if not cg_image:
on_window_closed(False)
break
width = CGImageGetWidth(cg_image)
height = CGImageGetHeight(cg_image)
raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image))
bpr = CGImageGetBytesPerRow(cg_image)
img = Image.frombuffer('RGBA', (width, height), raw_data, 'raw', 'BGRA', bpr, 1)
else:
try:
coord_left, coord_top, right, bottom = win32gui.GetWindowRect(window_handle)
coord_width = right - coord_left
coord_height = bottom - coord_top
hwnd_dc = win32gui.GetWindowDC(window_handle)
mfc_dc = win32ui.CreateDCFromHandle(hwnd_dc)
save_dc = mfc_dc.CreateCompatibleDC()
save_bitmap = win32ui.CreateBitmap()
save_bitmap.CreateCompatibleBitmap(mfc_dc, coord_width, coord_height)
save_dc.SelectObject(save_bitmap)
result = ctypes.windll.user32.PrintWindow(window_handle, save_dc.GetSafeHdc(), 2)
bmpinfo = save_bitmap.GetInfo()
bmpstr = save_bitmap.GetBitmapBits(True)
except pywintypes.error:
on_window_closed(False) on_window_closed(False)
break break
width = CGImageGetWidth(cg_image) img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
height = CGImageGetHeight(cg_image)
raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image))
bpr = CGImageGetBytesPerRow(cg_image)
img = Image.frombuffer('RGBA', (width, height), raw_data, 'raw', 'BGRA', bpr, 1)
elif screencapture_mode == 2 and sys.platform == 'win32':
try:
coord_left, coord_top, right, bottom = win32gui.GetWindowRect(window_handle)
coord_width = right - coord_left
coord_height = bottom - coord_top
hwnd_dc = win32gui.GetWindowDC(window_handle) win32gui.DeleteObject(save_bitmap.GetHandle())
mfc_dc = win32ui.CreateDCFromHandle(hwnd_dc) save_dc.DeleteDC()
save_dc = mfc_dc.CreateCompatibleDC() mfc_dc.DeleteDC()
win32gui.ReleaseDC(window_handle, hwnd_dc)
save_bitmap = win32ui.CreateBitmap()
save_bitmap.CreateCompatibleBitmap(mfc_dc, coord_width, coord_height)
save_dc.SelectObject(save_bitmap)
result = ctypes.windll.user32.PrintWindow(window_handle, save_dc.GetSafeHdc(), 2)
bmpinfo = save_bitmap.GetInfo()
bmpstr = save_bitmap.GetBitmapBits(True)
except pywintypes.error:
on_window_closed(False)
break
img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
win32gui.DeleteObject(save_bitmap.GetHandle())
save_dc.DeleteDC()
mfc_dc.DeleteDC()
win32gui.ReleaseDC(window_handle, hwnd_dc)
else: else:
sct_img = sct.grab(sct_params) sct_img = sct.grab(sct_params)
img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX') img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
@@ -1116,11 +1061,9 @@ def run(read_from=None,
if screen_capture_only_active_windows: if screen_capture_only_active_windows:
macos_window_tracker.stop = True macos_window_tracker.stop = True
macos_window_tracker.join() macos_window_tracker.join()
elif sys.platform == 'win32': else:
windows_window_tracker.stop = True windows_window_tracker.stop = True
windows_window_tracker.join() windows_window_tracker.join()
else:
target_window.watchdog.stop()
elif read_from == 'unixsocket': elif read_from == 'unixsocket':
unix_socket_server.shutdown() unix_socket_server.shutdown()
unix_socket_server_thread.join() unix_socket_server_thread.join()

View File

@@ -22,7 +22,7 @@
;screen_capture_area = screen_1 ;screen_capture_area = screen_1
;screen_capture_area = 400,200,1500,600 ;screen_capture_area = 400,200,1500,600
;screen_capture_area = OBS ;screen_capture_area = OBS
;note: if screen_capture_area is a window name, this can be changed to capture inactive windows too. On Linux, the window must then not be covered by other windows! ;note: if screen_capture_area is a window name, this can be changed to capture inactive windows too.
;screen_capture_only_active_windows = True ;screen_capture_only_active_windows = True
;screen_capture_delay_secs = 3 ;screen_capture_delay_secs = 3
;note: this specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: <ctrl>+<shift>+s. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key ;note: this specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: <ctrl>+<shift>+s. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key

View File

@@ -30,7 +30,6 @@ dependencies = [
"langid", "langid",
"psutil", "psutil",
"requests", "requests",
"pywinctl;platform_system=='Linux'",
"pywin32;platform_system=='Windows'", "pywin32;platform_system=='Windows'",
"pyobjc;platform_system=='Darwin'" "pyobjc;platform_system=='Darwin'"
] ]