From b879a6326aaade2120b09a65e408624bb9bd1d7b Mon Sep 17 00:00:00 2001 From: AuroraWright Date: Fri, 17 Oct 2025 11:38:16 +0200 Subject: [PATCH] Improve screen capture logic --- owocr/run.py | 257 +++++++++++++++++++++++++++++---------------------- 1 file changed, 149 insertions(+), 108 deletions(-) diff --git a/owocr/run.py b/owocr/run.py index 26b41ca..f05bac5 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -676,7 +676,10 @@ class TextFiltering: return changed_lines def _standalone_furigana_filter(self, result, result_ocr): - return self._find_changed_lines_text_impl(result, result_ocr, None, [], None, False, 0) + result = self._find_changed_lines_text_impl(result, result_ocr, None, [], None, False, 0) + if result == None: + result = [] + return result def _find_overlap(self, previous_text, current_text): min_overlap_length = 3 @@ -826,8 +829,11 @@ class ScreenshotThread(threading.Thread): self.coordinate_selector_combo_enabled = config.get_general('coordinate_selector_combo') != '' self.macos_window_tracker_instance = None self.windows_window_tracker_instance = None - self.screencapture_window_active = True - self.screencapture_window_visible = True + self.window_active = True + self.window_visible = True + self.window_closed = False + self.window_size = None + if screen_capture_area == '': self.screencapture_mode = 0 elif screen_capture_area.startswith('screen_'): @@ -875,6 +881,7 @@ class ScreenshotThread(threading.Thread): self.old_macos_screenshot_api = True else: self.old_macos_screenshot_api = False + self.window_stream_configuration = None self.screencapturekit_queue = queue.Queue() CGMainDisplayID() window_list = CGWindowListCopyWindowInfo(kCGWindowListExcludeDesktopElements, kCGNullWindowID) @@ -913,7 +920,11 @@ class ScreenshotThread(threading.Thread): logger.error(area_invalid_error) sys.exit(1) - ctypes.windll.shcore.SetProcessDpiAwareness(1) + ctypes.windll.shcore.SetProcessDpiAwareness(2) + self.window_visible = not win32gui.IsIconic(self.window_handle) + self.windows_window_mfc_dc = None + self.windows_window_save_dc = None + self.windows_window_save_bitmap = None self.windows_window_tracker_instance = threading.Thread(target=self.windows_window_tracker) self.windows_window_tracker_instance.start() @@ -927,8 +938,7 @@ class ScreenshotThread(threading.Thread): if len(screen_capture_window_area.split(',')) == 4: x, y, x2, y2 = [int(c.strip()) for c in screen_capture_window_area.split(',')] logger.info(f'Selected window coordinates: {x},{y},{x2},{y2}') - img = self.take_screenshot() - self.window_area_coordinates = (img.size, (x, y, x2, y2)) + self.window_area_coordinates = (x, y, x2, y2) elif screen_capture_window_area == '': self.launch_coordinate_picker(False, False) else: @@ -962,12 +972,11 @@ class ScreenshotThread(threading.Thread): if not found: break if self.screen_capture_only_active_windows: - self.screencapture_window_active = self.window_handle == win32gui.GetForegroundWindow() - else: - self.screencapture_window_visible = not win32gui.IsIconic(self.window_handle) + self.window_active = self.window_handle == win32gui.GetForegroundWindow() + self.window_visible = not win32gui.IsIconic(self.window_handle) time.sleep(0.5) if not found: - on_window_closed(False) + self.window_closed = True def capture_macos_window_screenshot(self, window_id): def shareable_content_completion_handler(shareable_content, error): @@ -985,22 +994,26 @@ class ScreenshotThread(threading.Thread): self.screencapturekit_queue.put(None) return + if not self.window_stream_configuration: + self.window_stream_configuration = SCStreamConfiguration.alloc().init() + self.window_stream_configuration.setShowsCursor_(False) + self.window_stream_configuration.setCaptureResolution_(SCCaptureResolutionNominal) + self.window_stream_configuration.setIgnoreGlobalClipSingleWindow_(True) + with objc.autorelease_pool(): content_filter = SCContentFilter.alloc().initWithDesktopIndependentWindow_(target_window) - frame = content_filter.contentRect() width = frame.size.width height = frame.size.height - configuration = SCStreamConfiguration.alloc().init() - configuration.setSourceRect_(CGRectMake(0, 0, width, height)) - configuration.setWidth_(width) - configuration.setHeight_(height) - configuration.setShowsCursor_(False) - configuration.setCaptureResolution_(SCCaptureResolutionNominal) - configuration.setIgnoreGlobalClipSingleWindow_(True) + current_size = (width, height) + + if current_size != self.window_size: + self.window_stream_configuration.setSourceRect_(CGRectMake(0, 0, width, height)) + self.window_stream_configuration.setWidth_(width) + self.window_stream_configuration.setHeight_(height) SCScreenshotManager.captureImageWithFilter_configuration_completionHandler_( - content_filter, configuration, capture_image_completion_handler + content_filter, self.window_stream_configuration, capture_image_completion_handler ) def capture_image_completion_handler(image, error): @@ -1013,6 +1026,10 @@ class ScreenshotThread(threading.Thread): SCShareableContent.getShareableContentWithCompletionHandler_( shareable_content_completion_handler ) + try: + return self.screencapturekit_queue.get(timeout=5) + except queue.Empty: + return None def macos_window_tracker(self): found = True @@ -1035,29 +1052,38 @@ class ScreenshotThread(threading.Thread): if len(window_list) > 0: found = True if found: - self.screencapture_window_active = is_active + self.window_active = is_active time.sleep(0.5) if not found: - on_window_closed(False) + self.window_closed = True - def take_screenshot(self): + def take_screenshot(self, ignore_active_status): if self.screencapture_mode == 2: + if self.window_closed: + return False + if not ignore_active_status and not self.window_active: + return None + if not self.window_visible: + return None + + self.window_size_changed = False if sys.platform == 'darwin': with objc.autorelease_pool(): if self.old_macos_screenshot_api: cg_image = CGWindowListCreateImageFromArray(CGRectNull, [self.window_id], kCGWindowImageBoundsIgnoreFraming | kCGWindowImageNominalResolution) else: - self.capture_macos_window_screenshot(self.window_id) - try: - cg_image = self.screencapturekit_queue.get(timeout=0.5) - except queue.Empty: - cg_image = None + cg_image = self.capture_macos_window_screenshot(self.window_id) if not cg_image: - return None + return False width = CGImageGetWidth(cg_image) height = CGImageGetHeight(cg_image) raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image)) bpr = CGImageGetBytesPerRow(cg_image) + current_size = (width, height) + if self.window_size != current_size: + if self.window_size: + self.window_size_changed = True + self.window_size = current_size img = Image.frombuffer('RGBA', (width, height), raw_data, 'raw', 'BGRA', bpr, 1) else: try: @@ -1065,49 +1091,69 @@ class ScreenshotThread(threading.Thread): coord_width = right - coord_left coord_height = bottom - coord_top - hwnd_dc = win32gui.GetWindowDC(self.window_handle) - mfc_dc = win32ui.CreateDCFromHandle(hwnd_dc) - save_dc = mfc_dc.CreateCompatibleDC() + current_size = (coord_width, coord_height) + if self.window_size != current_size: + if self.window_size: + window_size_changed = True + self.reset_windows_window() - save_bitmap = win32ui.CreateBitmap() - save_bitmap.CreateCompatibleBitmap(mfc_dc, coord_width, coord_height) - save_dc.SelectObject(save_bitmap) + hwnd_dc = win32gui.GetWindowDC(self.window_handle) + self.windows_window_mfc_dc = win32ui.CreateDCFromHandle(hwnd_dc) + self.windows_window_save_dc = self.windows_window_mfc_dc.CreateCompatibleDC() + self.windows_window_save_bitmap = win32ui.CreateBitmap() + self.windows_window_save_bitmap.CreateCompatibleBitmap(self.windows_window_mfc_dc, coord_width, coord_height) + self.windows_window_save_dc.SelectObject(self.windows_window_save_bitmap) - result = ctypes.windll.user32.PrintWindow(self.window_handle, save_dc.GetSafeHdc(), 2) + self.window_size = current_size + win32gui.ReleaseDC(self.window_handle, hwnd_dc) - bmpinfo = save_bitmap.GetInfo() - bmpstr = save_bitmap.GetBitmapBits(True) + result = ctypes.windll.user32.PrintWindow(self.window_handle, self.windows_window_save_dc.GetSafeHdc(), 2) + + bmpinfo = self.windows_window_save_bitmap.GetInfo() + bmpstr = self.windows_window_save_bitmap.GetBitmapBits(True) + + img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1) + return img except pywintypes.error: return None - img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1) - try: - win32gui.DeleteObject(save_bitmap.GetHandle()) - except: - pass - try: - save_dc.DeleteDC() - except: - pass - try: - mfc_dc.DeleteDC() - except: - pass - try: - win32gui.ReleaseDC(self.window_handle, hwnd_dc) - except: - pass + if self.window_area_coordinates: - if img.size != self.window_area_coordinates[0]: + if self.window_size_changed: self.window_area_coordinates = None logger.warning('Window size changed, discarding area selection') else: - img = img.crop(self.window_area_coordinates[1]) + img = img.crop(self.window_area_coordinates) else: sct_img = self.sct.grab(self.sct_params) img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX') return img + def cleanup_window_screen_capture(self): + if sys.platform == 'win32': + try: + if self.windows_window_save_bitmap: + win32gui.DeleteObject(self.windows_window_save_bitmap.GetHandle()) + self.windows_window_save_bitmap = None + except: + pass + try: + if self.windows_window_save_dc: + self.windows_window_save_dc.DeleteDC() + self.windows_window_save_dc = None + except: + pass + try: + if self.windows_window_mfc_dc: + self.windows_window_mfc_dc.DeleteDC() + self.windows_window_mfc_dc = None + except: + pass + elif not self.old_macos_screenshot_api: + if self.window_stream_configuration: + self.window_stream_configuration.dealloc() + self.window_stream_configuration = None + def write_result(self, result, is_combo): if is_combo: image_queue.put((result, True)) @@ -1144,9 +1190,12 @@ class ScreenshotThread(threading.Thread): logger.info(f'Selected coordinates: {coord_left},{coord_top},{coord_width},{coord_height}') else: self.window_area_coordinates = None - img = self.take_screenshot() logger.info('Launching window coordinate picker') - window_selection = get_screen_selection(img, self.coordinate_selector_combo_enabled) + img = self.take_screenshot(True) + if not img: + window_selection = False + else: + window_selection = get_screen_selection(img, self.coordinate_selector_combo_enabled) if not window_selection: logger.warning('Picker window was closed or an error occurred, selecting whole window') else: @@ -1155,7 +1204,7 @@ class ScreenshotThread(threading.Thread): x2 = x + coord_width y2 = y + coord_height logger.info(f'Selected window coordinates: {x},{y},{x2},{y2}') - self.window_area_coordinates = (img.size, (x, y, x2, y2)) + self.window_area_coordinates = (x, y, x2, y2) else: logger.info('Selection is empty, selecting whole window') @@ -1172,13 +1221,14 @@ class ScreenshotThread(threading.Thread): except queue.Empty: continue - img = self.take_screenshot() - if not img: - self.write_result(0, is_combo) - break - + img = self.take_screenshot(False) self.write_result(img, is_combo) + if img == False: + break + + if self.screencapture_mode == 2: + self.cleanup_window_screen_capture() if self.macos_window_tracker_instance: self.macos_window_tracker_instance.join() elif self.windows_window_tracker_instance: @@ -1280,6 +1330,11 @@ class SecondPassThread: class OutputResult: def __init__(self): self.screen_capture_periodic = config.get_general('screen_capture_delay_secs') != -1 + self.output_format = config.get_general('output_format') + self.engine_color = config.get_general('engine_color') + self.verbosity = config.get_general('verbosity') + self.notifications = config.get_general('notifications') + self.write_to = config.get_general('write_to') self.filtering = TextFiltering() self.second_pass_thread = SecondPassThread() @@ -1305,8 +1360,6 @@ class OutputResult: def __call__(self, img_or_path, filter_text, auto_pause, notify): engine_index_local = engine_index - output_format = config.get_general('output_format') - engine_color = config.get_general('engine_color') engine_instance = engine_instances[engine_index] two_pass_processing_active = False result_data = None @@ -1320,14 +1373,15 @@ class OutputResult: end_time = time.time() if not res2: - logger.opt(colors=True).warning(f'<{engine_color}>{engine_instance_2.readable_name} reported an error after {end_time - start_time:0.03f}s: {result_data_2}') + logger.opt(colors=True).warning(f'<{self.engine_color}>{engine_instance_2.readable_name} reported an error after {end_time - start_time:0.03f}s: {result_data_2}') else: changed_lines_count, recovered_lines_count, changed_regions_image = self.filtering._find_changed_lines(img_or_path, result_data_2) if changed_lines_count or recovered_lines_count: - logger.opt(colors=True).info(f"<{engine_color}>{engine_instance_2.readable_name} found {changed_lines_count + recovered_lines_count} changed line(s) in {end_time - start_time:0.03f}s, re-OCRing with <{engine_color}>{engine_instance.readable_name}") + if self.verbosity != 0: + logger.opt(colors=True).info(f"<{self.engine_color}>{engine_instance_2.readable_name} found {changed_lines_count + recovered_lines_count} changed line(s) in {end_time - start_time:0.03f}s, re-OCRing with <{self.engine_color}>{engine_instance.readable_name}") - if output_format != 'json': + if self.output_format != 'json': if changed_regions_image: img_or_path = changed_regions_image @@ -1356,10 +1410,9 @@ class OutputResult: if not res: if auto_pause_handler and auto_pause: auto_pause_handler.stop_timer() - logger.opt(colors=True).warning(f'<{engine_color}>{engine_name} reported an error after {processing_time:0.03f}s: {result_data}') + logger.opt(colors=True).warning(f'<{self.engine_color}>{engine_name} reported an error after {processing_time:0.03f}s: {result_data}') return - verbosity = config.get_general('verbosity') output_string = '' log_message = '' result_data_text = None @@ -1367,7 +1420,7 @@ class OutputResult: if isinstance(result_data, OcrResult): unprocessed_text = self._extract_lines_from_result(result_data) - if output_format == 'json': + if self.output_format == 'json': result_dict = asdict(result_data) output_string = json.dumps(result_dict, ensure_ascii=False) log_message = self._post_process(unprocessed_text, False) @@ -1390,26 +1443,25 @@ class OutputResult: output_string = self._post_process(result_data_text, False) log_message = output_string - if verbosity != 0: - if verbosity < -1: + if self.verbosity != 0: + if self.verbosity < -1: log_message_terminal = ': ' + log_message - elif verbosity == -1: + elif self.verbosity == -1: log_message_terminal = '' else: - log_message_terminal = ': ' + (log_message if len(log_message) <= verbosity else log_message[:verbosity] + '[...]') + log_message_terminal = ': ' + (log_message if len(log_message) <= self.verbosity else log_message[:self.verbosity] + '[...]') - logger.opt(colors=True).info(f'Text recognized in {processing_time:0.03f}s using <{engine_color}>{engine_name}{log_message_terminal}') + logger.opt(colors=True).info(f'Text recognized in {processing_time:0.03f}s using <{self.engine_color}>{engine_name}{log_message_terminal}') - if notify and config.get_general('notifications'): + if notify and self.notifications: notifier.send(title='owocr', message='Text recognized: ' + log_message, urgency=get_notification_urgency()) - write_to = config.get_general('write_to') - if write_to == 'websocket': + if self.write_to == 'websocket': websocket_server_thread.send_text(output_string) - elif write_to == 'clipboard': + elif self.write_to == 'clipboard': pyperclipfix.copy(output_string) else: - with Path(write_to).open('a', encoding='utf-8') as f: + with Path(self.write_to).open('a', encoding='utf-8') as f: f.write(output_string + '\n') if auto_pause_handler and auto_pause: @@ -1456,12 +1508,13 @@ def engine_change_handler(user_input='s', is_combo=True): logger.opt(colors=True).info(f'Switched to <{engine_color}>{new_engine_name}!') -def user_input_thread_run(): - def _terminate_handler(): - global terminated - logger.info('Terminated!') - terminated.set() +def terminate_handler(sig=None, frame=None): + global terminated + logger.info('Terminated!') + terminated.set() + +def user_input_thread_run(): if sys.platform == 'win32': import msvcrt while not terminated.is_set(): @@ -1473,7 +1526,7 @@ def user_input_thread_run(): user_input_bytes = msvcrt.getch() user_input = user_input_bytes.decode() if user_input.lower() in 'tq': - _terminate_handler() + terminate_handler() elif user_input.lower() == 'p': pause_handler(False) else: @@ -1502,7 +1555,7 @@ def user_input_thread_run(): if rlist: user_input = sys.stdin.read(1) if user_input.lower() in 'tq': - _terminate_handler() + terminate_handler() elif user_input.lower() == 'p': pause_handler(False) else: @@ -1511,19 +1564,6 @@ def user_input_thread_run(): termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) -def signal_handler(sig, frame): - global terminated - logger.info('Terminated!') - terminated.set() - - -def on_window_closed(alive): - global terminated - if not (alive or terminated): - logger.info('Window closed or error occurred, terminated!') - terminated.set() - - def on_screenshot_combo(): screenshot_request_queue.put(True) @@ -1701,7 +1741,7 @@ def run(): write_to_readable = f'file {write_to}' process_queue = (any(i in ('clipboard', 'websocket', 'unixsocket') for i in (read_from, read_from_secondary)) or read_from_path or screen_capture_on_combo) - signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGINT, terminate_handler) if auto_pause != 0: auto_pause_handler = AutopauseTimer() user_input_thread = threading.Thread(target=user_input_thread_run, daemon=True) @@ -1727,8 +1767,8 @@ def run(): except queue.Empty: pass - if (not img) and screen_capture_periodic: - if (not paused.is_set()) and screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs: + if img == None and screen_capture_periodic: + if (not paused.is_set()) and (time.time() - last_screenshot_time) > screen_capture_delay_secs: if periodic_screenshot_queue.empty() and screenshot_request_queue.empty(): screenshot_request_queue.put(False) try: @@ -1739,8 +1779,8 @@ def run(): skip_waiting = True pass - if img == 0: - on_window_closed(False) + if img == False: + logger.info('The window was closed or an error occurred, terminated!') terminated.set() break elif img: @@ -1753,8 +1793,9 @@ def run(): terminate_selector_if_running() user_input_thread.join() - auto_pause_handler.stop() output_result.second_pass_thread.stop() + if auto_pause_handler: + auto_pause_handler.stop() if websocket_server_thread: websocket_server_thread.stop_server() websocket_server_thread.join()