From b879a6326aaade2120b09a65e408624bb9bd1d7b Mon Sep 17 00:00:00 2001
From: AuroraWright <AuroraWright@users.noreply.github.com>
Date: Fri, 17 Oct 2025 11:38:16 +0200
Subject: [PATCH] Improve screen capture logic

---
 owocr/run.py | 257 +++++++++++++++++++++++++++++----------------------
 1 file changed, 149 insertions(+), 108 deletions(-)

diff --git a/owocr/run.py b/owocr/run.py
index 26b41ca..f05bac5 100644
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -676,7 +676,10 @@ class TextFiltering:
         return changed_lines
 
     def _standalone_furigana_filter(self, result, result_ocr):
-        return self._find_changed_lines_text_impl(result, result_ocr, None, [], None, False, 0)
+        result = self._find_changed_lines_text_impl(result, result_ocr, None, [], None, False, 0)
+        if result == None:
+            result = []
+        return result
 
     def _find_overlap(self, previous_text, current_text):
         min_overlap_length = 3
@@ -826,8 +829,11 @@ class ScreenshotThread(threading.Thread):
         self.coordinate_selector_combo_enabled = config.get_general('coordinate_selector_combo') != ''
         self.macos_window_tracker_instance = None
         self.windows_window_tracker_instance = None
-        self.screencapture_window_active = True
-        self.screencapture_window_visible = True
+        self.window_active = True
+        self.window_visible = True
+        self.window_closed = False
+        self.window_size = None
+
         if screen_capture_area == '':
             self.screencapture_mode = 0
         elif screen_capture_area.startswith('screen_'):
@@ -875,6 +881,7 @@ class ScreenshotThread(threading.Thread):
                     self.old_macos_screenshot_api = True
                 else:
                     self.old_macos_screenshot_api = False
+                    self.window_stream_configuration = None
                     self.screencapturekit_queue = queue.Queue()
                     CGMainDisplayID()
                 window_list = CGWindowListCopyWindowInfo(kCGWindowListExcludeDesktopElements, kCGNullWindowID)
@@ -913,7 +920,11 @@ class ScreenshotThread(threading.Thread):
                     logger.error(area_invalid_error)
                     sys.exit(1)
 
-                ctypes.windll.shcore.SetProcessDpiAwareness(1)
+                ctypes.windll.shcore.SetProcessDpiAwareness(2)
+                self.window_visible = not win32gui.IsIconic(self.window_handle)
+                self.windows_window_mfc_dc = None
+                self.windows_window_save_dc = None
+                self.windows_window_save_bitmap = None
 
                 self.windows_window_tracker_instance = threading.Thread(target=self.windows_window_tracker)
                 self.windows_window_tracker_instance.start()
@@ -927,8 +938,7 @@ class ScreenshotThread(threading.Thread):
                 if len(screen_capture_window_area.split(',')) == 4:
                     x, y, x2, y2 = [int(c.strip()) for c in screen_capture_window_area.split(',')]
                     logger.info(f'Selected window coordinates: {x},{y},{x2},{y2}')
-                    img = self.take_screenshot()
-                    self.window_area_coordinates = (img.size, (x, y, x2, y2))
+                    self.window_area_coordinates = (x, y, x2, y2)
                 elif screen_capture_window_area == '':
                     self.launch_coordinate_picker(False, False)
                 else:
@@ -962,12 +972,11 @@ class ScreenshotThread(threading.Thread):
             if not found:
                 break
             if self.screen_capture_only_active_windows:
-                self.screencapture_window_active = self.window_handle == win32gui.GetForegroundWindow()
-            else:
-                self.screencapture_window_visible = not win32gui.IsIconic(self.window_handle)
+                self.window_active = self.window_handle == win32gui.GetForegroundWindow()
+            self.window_visible = not win32gui.IsIconic(self.window_handle)
             time.sleep(0.5)
         if not found:
-            on_window_closed(False)
+            self.window_closed = True
 
     def capture_macos_window_screenshot(self, window_id):
         def shareable_content_completion_handler(shareable_content, error):
@@ -985,22 +994,26 @@ class ScreenshotThread(threading.Thread):
                 self.screencapturekit_queue.put(None)
                 return
 
+            if not self.window_stream_configuration:
+                self.window_stream_configuration = SCStreamConfiguration.alloc().init()
+                self.window_stream_configuration.setShowsCursor_(False)
+                self.window_stream_configuration.setCaptureResolution_(SCCaptureResolutionNominal)
+                self.window_stream_configuration.setIgnoreGlobalClipSingleWindow_(True)
+
             with objc.autorelease_pool():
                 content_filter = SCContentFilter.alloc().initWithDesktopIndependentWindow_(target_window)
-
                 frame = content_filter.contentRect()
                 width = frame.size.width
                 height = frame.size.height
-                configuration = SCStreamConfiguration.alloc().init()
-                configuration.setSourceRect_(CGRectMake(0, 0, width, height))
-                configuration.setWidth_(width)
-                configuration.setHeight_(height)
-                configuration.setShowsCursor_(False)
-                configuration.setCaptureResolution_(SCCaptureResolutionNominal)
-                configuration.setIgnoreGlobalClipSingleWindow_(True)
+                current_size = (width, height)
+
+                if current_size != self.window_size:
+                    self.window_stream_configuration.setSourceRect_(CGRectMake(0, 0, width, height))
+                    self.window_stream_configuration.setWidth_(width)
+                    self.window_stream_configuration.setHeight_(height)
 
                 SCScreenshotManager.captureImageWithFilter_configuration_completionHandler_(
-                    content_filter, configuration, capture_image_completion_handler
+                    content_filter, self.window_stream_configuration, capture_image_completion_handler
                 )
 
         def capture_image_completion_handler(image, error):
@@ -1013,6 +1026,10 @@ class ScreenshotThread(threading.Thread):
         SCShareableContent.getShareableContentWithCompletionHandler_(
             shareable_content_completion_handler
         )
+        try:
+            return self.screencapturekit_queue.get(timeout=5)
+        except queue.Empty:
+            return None
 
     def macos_window_tracker(self):
         found = True
@@ -1035,29 +1052,38 @@ class ScreenshotThread(threading.Thread):
                     if len(window_list) > 0:
                         found = True
             if found:
-                self.screencapture_window_active = is_active
+                self.window_active = is_active
             time.sleep(0.5)
         if not found:
-            on_window_closed(False)
+            self.window_closed = True
 
-    def take_screenshot(self):
+    def take_screenshot(self, ignore_active_status):
         if self.screencapture_mode == 2:
+            if self.window_closed:
+                return False
+            if not ignore_active_status and not self.window_active:
+                return None
+            if not self.window_visible:
+                return None
+
+            self.window_size_changed = False
             if sys.platform == 'darwin':
                 with objc.autorelease_pool():
                     if self.old_macos_screenshot_api:
                         cg_image = CGWindowListCreateImageFromArray(CGRectNull, [self.window_id], kCGWindowImageBoundsIgnoreFraming | kCGWindowImageNominalResolution)
                     else:
-                        self.capture_macos_window_screenshot(self.window_id)
-                        try:
-                            cg_image = self.screencapturekit_queue.get(timeout=0.5)
-                        except queue.Empty:
-                            cg_image = None
+                        cg_image = self.capture_macos_window_screenshot(self.window_id)
                     if not cg_image:
-                        return None
+                        return False
                     width = CGImageGetWidth(cg_image)
                     height = CGImageGetHeight(cg_image)
                     raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image))
                     bpr = CGImageGetBytesPerRow(cg_image)
+                    current_size = (width, height)
+                    if self.window_size != current_size:
+                        if self.window_size:
+                            self.window_size_changed = True
+                        self.window_size = current_size
                 img = Image.frombuffer('RGBA', (width, height), raw_data, 'raw', 'BGRA', bpr, 1)
             else:
                 try:
@@ -1065,49 +1091,69 @@ class ScreenshotThread(threading.Thread):
                     coord_width = right - coord_left
                     coord_height = bottom - coord_top
 
-                    hwnd_dc = win32gui.GetWindowDC(self.window_handle)
-                    mfc_dc = win32ui.CreateDCFromHandle(hwnd_dc)
-                    save_dc = mfc_dc.CreateCompatibleDC()
+                    current_size = (coord_width, coord_height)
+                    if self.window_size != current_size:
+                        if self.window_size:
+                            window_size_changed = True
+                            self.reset_windows_window()
 
-                    save_bitmap = win32ui.CreateBitmap()
-                    save_bitmap.CreateCompatibleBitmap(mfc_dc, coord_width, coord_height)
-                    save_dc.SelectObject(save_bitmap)
+                        hwnd_dc = win32gui.GetWindowDC(self.window_handle)
+                        self.windows_window_mfc_dc = win32ui.CreateDCFromHandle(hwnd_dc)
+                        self.windows_window_save_dc = self.windows_window_mfc_dc.CreateCompatibleDC()
+                        self.windows_window_save_bitmap = win32ui.CreateBitmap()
+                        self.windows_window_save_bitmap.CreateCompatibleBitmap(self.windows_window_mfc_dc, coord_width, coord_height)
+                        self.windows_window_save_dc.SelectObject(self.windows_window_save_bitmap)
 
-                    result = ctypes.windll.user32.PrintWindow(self.window_handle, save_dc.GetSafeHdc(), 2)
+                        self.window_size = current_size
+                        win32gui.ReleaseDC(self.window_handle, hwnd_dc)
 
-                    bmpinfo = save_bitmap.GetInfo()
-                    bmpstr = save_bitmap.GetBitmapBits(True)
+                    result = ctypes.windll.user32.PrintWindow(self.window_handle, self.windows_window_save_dc.GetSafeHdc(), 2)
+
+                    bmpinfo = self.windows_window_save_bitmap.GetInfo()
+                    bmpstr = self.windows_window_save_bitmap.GetBitmapBits(True)
+
+                    img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
+                    return img
                 except pywintypes.error:
                     return None
-                img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
-                try:
-                    win32gui.DeleteObject(save_bitmap.GetHandle())
-                except:
-                    pass
-                try:
-                    save_dc.DeleteDC()
-                except:
-                    pass
-                try:
-                    mfc_dc.DeleteDC()
-                except:
-                    pass
-                try:
-                    win32gui.ReleaseDC(self.window_handle, hwnd_dc)
-                except:
-                    pass
+
             if self.window_area_coordinates:
-                if img.size != self.window_area_coordinates[0]:
+                if self.window_size_changed:
                     self.window_area_coordinates = None
                     logger.warning('Window size changed, discarding area selection')
                 else:
-                    img = img.crop(self.window_area_coordinates[1])
+                    img = img.crop(self.window_area_coordinates)
         else:
             sct_img = self.sct.grab(self.sct_params)
             img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
 
         return img
 
+    def cleanup_window_screen_capture(self):
+        if sys.platform == 'win32':
+            try:
+                if self.windows_window_save_bitmap:
+                    win32gui.DeleteObject(self.windows_window_save_bitmap.GetHandle())
+                    self.windows_window_save_bitmap = None
+            except:
+                pass
+            try:
+                if self.windows_window_save_dc:
+                    self.windows_window_save_dc.DeleteDC()
+                    self.windows_window_save_dc = None
+            except:
+                pass
+            try:
+                if self.windows_window_mfc_dc:
+                    self.windows_window_mfc_dc.DeleteDC()
+                    self.windows_window_mfc_dc = None
+            except:
+                pass
+        elif not self.old_macos_screenshot_api:
+            if self.window_stream_configuration:
+                self.window_stream_configuration.dealloc()
+                self.window_stream_configuration = None
+
     def write_result(self, result, is_combo):
         if is_combo:
             image_queue.put((result, True))
@@ -1144,9 +1190,12 @@ class ScreenshotThread(threading.Thread):
             logger.info(f'Selected coordinates: {coord_left},{coord_top},{coord_width},{coord_height}')
         else:
             self.window_area_coordinates = None
-            img = self.take_screenshot()
             logger.info('Launching window coordinate picker')
-            window_selection = get_screen_selection(img, self.coordinate_selector_combo_enabled)
+            img = self.take_screenshot(True)
+            if not img:
+                window_selection = False
+            else:
+                window_selection = get_screen_selection(img, self.coordinate_selector_combo_enabled)
             if not window_selection:
                 logger.warning('Picker window was closed or an error occurred, selecting whole window')
             else:
@@ -1155,7 +1204,7 @@ class ScreenshotThread(threading.Thread):
                     x2 = x + coord_width
                     y2 = y + coord_height
                     logger.info(f'Selected window coordinates: {x},{y},{x2},{y2}')
-                    self.window_area_coordinates = (img.size, (x, y, x2, y2))
+                    self.window_area_coordinates = (x, y, x2, y2)
                 else:
                     logger.info('Selection is empty, selecting whole window')
 
@@ -1172,13 +1221,14 @@ class ScreenshotThread(threading.Thread):
             except queue.Empty:
                 continue
 
-            img = self.take_screenshot()
-            if not img:
-                self.write_result(0, is_combo)
-                break
-
+            img = self.take_screenshot(False)
             self.write_result(img, is_combo)
 
+            if img == False:
+                break
+
+        if self.screencapture_mode == 2:
+            self.cleanup_window_screen_capture()
         if self.macos_window_tracker_instance:
             self.macos_window_tracker_instance.join()
         elif self.windows_window_tracker_instance:
@@ -1280,6 +1330,11 @@ class SecondPassThread:
 class OutputResult:
     def __init__(self):
         self.screen_capture_periodic = config.get_general('screen_capture_delay_secs') != -1
+        self.output_format = config.get_general('output_format')
+        self.engine_color = config.get_general('engine_color')
+        self.verbosity = config.get_general('verbosity')
+        self.notifications = config.get_general('notifications')
+        self.write_to = config.get_general('write_to')
         self.filtering = TextFiltering()
         self.second_pass_thread = SecondPassThread()
 
@@ -1305,8 +1360,6 @@ class OutputResult:
 
     def __call__(self, img_or_path, filter_text, auto_pause, notify):
         engine_index_local = engine_index
-        output_format = config.get_general('output_format')
-        engine_color = config.get_general('engine_color')
         engine_instance = engine_instances[engine_index]
         two_pass_processing_active = False
         result_data = None
@@ -1320,14 +1373,15 @@ class OutputResult:
                 end_time = time.time()
 
                 if not res2:
-                    logger.opt(colors=True).warning(f'<{engine_color}>{engine_instance_2.readable_name}</{engine_color}> reported an error after {end_time - start_time:0.03f}s: {result_data_2}')
+                    logger.opt(colors=True).warning(f'<{self.engine_color}>{engine_instance_2.readable_name}</{self.engine_color}> reported an error after {end_time - start_time:0.03f}s: {result_data_2}')
                 else:
                     changed_lines_count, recovered_lines_count, changed_regions_image = self.filtering._find_changed_lines(img_or_path, result_data_2)
 
                     if changed_lines_count or recovered_lines_count:
-                        logger.opt(colors=True).info(f"<{engine_color}>{engine_instance_2.readable_name}</{engine_color}> found {changed_lines_count + recovered_lines_count} changed line(s) in {end_time - start_time:0.03f}s, re-OCRing with <{engine_color}>{engine_instance.readable_name}</{engine_color}>")
+                        if self.verbosity != 0:
+                            logger.opt(colors=True).info(f"<{self.engine_color}>{engine_instance_2.readable_name}</{self.engine_color}> found {changed_lines_count + recovered_lines_count} changed line(s) in {end_time - start_time:0.03f}s, re-OCRing with <{self.engine_color}>{engine_instance.readable_name}</{self.engine_color}>")
 
-                        if output_format != 'json':
+                        if self.output_format != 'json':
                             if changed_regions_image:
                                 img_or_path = changed_regions_image
 
@@ -1356,10 +1410,9 @@ class OutputResult:
         if not res:
             if auto_pause_handler and auto_pause:
                 auto_pause_handler.stop_timer()
-            logger.opt(colors=True).warning(f'<{engine_color}>{engine_name}</{engine_color}> reported an error after {processing_time:0.03f}s: {result_data}')
+            logger.opt(colors=True).warning(f'<{self.engine_color}>{engine_name}</{self.engine_color}> reported an error after {processing_time:0.03f}s: {result_data}')
             return
 
-        verbosity = config.get_general('verbosity')
         output_string = ''
         log_message = ''
         result_data_text = None
@@ -1367,7 +1420,7 @@ class OutputResult:
         if isinstance(result_data, OcrResult):
             unprocessed_text = self._extract_lines_from_result(result_data)
 
-            if output_format == 'json':
+            if self.output_format == 'json':
                 result_dict = asdict(result_data)
                 output_string = json.dumps(result_dict, ensure_ascii=False)
                 log_message = self._post_process(unprocessed_text, False)
@@ -1390,26 +1443,25 @@ class OutputResult:
                 output_string = self._post_process(result_data_text, False)
             log_message = output_string
 
-        if verbosity != 0:
-            if verbosity < -1:
+        if self.verbosity != 0:
+            if self.verbosity < -1:
                 log_message_terminal = ': ' + log_message
-            elif verbosity == -1:
+            elif self.verbosity == -1:
                 log_message_terminal = ''
             else:
-                log_message_terminal = ': ' + (log_message if len(log_message) <= verbosity else log_message[:verbosity] + '[...]')
+                log_message_terminal = ': ' + (log_message if len(log_message) <= self.verbosity else log_message[:self.verbosity] + '[...]')
 
-            logger.opt(colors=True).info(f'Text recognized in {processing_time:0.03f}s using <{engine_color}>{engine_name}</{engine_color}>{log_message_terminal}')
+            logger.opt(colors=True).info(f'Text recognized in {processing_time:0.03f}s using <{self.engine_color}>{engine_name}</{self.engine_color}>{log_message_terminal}')
 
-        if notify and config.get_general('notifications'):
+        if notify and self.notifications:
             notifier.send(title='owocr', message='Text recognized: ' + log_message, urgency=get_notification_urgency())
 
-        write_to = config.get_general('write_to')
-        if write_to == 'websocket':
+        if self.write_to == 'websocket':
             websocket_server_thread.send_text(output_string)
-        elif write_to == 'clipboard':
+        elif self.write_to == 'clipboard':
             pyperclipfix.copy(output_string)
         else:
-            with Path(write_to).open('a', encoding='utf-8') as f:
+            with Path(self.write_to).open('a', encoding='utf-8') as f:
                 f.write(output_string + '\n')
 
         if auto_pause_handler and auto_pause:
@@ -1456,12 +1508,13 @@ def engine_change_handler(user_input='s', is_combo=True):
         logger.opt(colors=True).info(f'Switched to <{engine_color}>{new_engine_name}</{engine_color}>!')
 
 
-def user_input_thread_run():
-    def _terminate_handler():
-        global terminated
-        logger.info('Terminated!')
-        terminated.set()
+def terminate_handler(sig=None, frame=None):
+    global terminated
+    logger.info('Terminated!')
+    terminated.set()
 
+
+def user_input_thread_run():
     if sys.platform == 'win32':
         import msvcrt
         while not terminated.is_set():
@@ -1473,7 +1526,7 @@ def user_input_thread_run():
                     user_input_bytes = msvcrt.getch()
                     user_input = user_input_bytes.decode()
                     if user_input.lower() in 'tq':
-                        _terminate_handler()
+                        terminate_handler()
                     elif user_input.lower() == 'p':
                         pause_handler(False)
                     else:
@@ -1502,7 +1555,7 @@ def user_input_thread_run():
                 if rlist:
                     user_input = sys.stdin.read(1)
                     if user_input.lower() in 'tq':
-                        _terminate_handler()
+                        terminate_handler()
                     elif user_input.lower() == 'p':
                         pause_handler(False)
                     else:
@@ -1511,19 +1564,6 @@ def user_input_thread_run():
             termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
 
 
-def signal_handler(sig, frame):
-    global terminated
-    logger.info('Terminated!')
-    terminated.set()
-
-
-def on_window_closed(alive):
-    global terminated
-    if not (alive or terminated):
-        logger.info('Window closed or error occurred, terminated!')
-        terminated.set()
-
-
 def on_screenshot_combo():
     screenshot_request_queue.put(True)
 
@@ -1701,7 +1741,7 @@ def run():
         write_to_readable = f'file {write_to}'
 
     process_queue = (any(i in ('clipboard', 'websocket', 'unixsocket') for i in (read_from, read_from_secondary)) or read_from_path or screen_capture_on_combo)
-    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGINT, terminate_handler)
     if auto_pause != 0:
         auto_pause_handler = AutopauseTimer()
     user_input_thread = threading.Thread(target=user_input_thread_run, daemon=True)
@@ -1727,8 +1767,8 @@ def run():
             except queue.Empty:
                 pass
 
-        if (not img) and screen_capture_periodic:
-            if (not paused.is_set()) and screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs:
+        if img == None and screen_capture_periodic:
+            if (not paused.is_set()) and (time.time() - last_screenshot_time) > screen_capture_delay_secs:
                 if periodic_screenshot_queue.empty() and screenshot_request_queue.empty():
                     screenshot_request_queue.put(False)
                 try:
@@ -1739,8 +1779,8 @@ def run():
                     skip_waiting = True
                     pass
 
-        if img == 0:
-            on_window_closed(False)
+        if img == False:
+            logger.info('The window was closed or an error occurred, terminated!')
             terminated.set()
             break
         elif img:
@@ -1753,8 +1793,9 @@ def run():
 
     terminate_selector_if_running()
     user_input_thread.join()
-    auto_pause_handler.stop()
     output_result.second_pass_thread.stop()
+    if auto_pause_handler:
+        auto_pause_handler.stop()
     if websocket_server_thread:
         websocket_server_thread.stop_server()
         websocket_server_thread.join()