From 2cb828d1dafb24c61ab9ae7c2a0c1ce8f5a8089e Mon Sep 17 00:00:00 2001
From: AuroraWright <AuroraWright@users.noreply.github.com>
Date: Mon, 16 Jun 2025 16:41:08 +0200
Subject: [PATCH] Fix Linux crashes

---
 owocr/ocr.py   |  82 +++++++++++++-----------
 owocr/run.py   | 169 +++++++++++++++++++++++++++----------------------
 pyproject.toml |   2 +-
 3 files changed, 139 insertions(+), 114 deletions(-)

diff --git a/owocr/ocr.py b/owocr/ocr.py
index fc9532e..b99a650 100644
--- a/owocr/ocr.py
+++ b/owocr/ocr.py
@@ -97,11 +97,13 @@ def post_process(text):
 
 
 def input_to_pil_image(img):
+    is_path = False
     if isinstance(img, Image.Image):
         pil_image = img
     elif isinstance(img, (bytes, bytearray)):
         pil_image = Image.open(io.BytesIO(img))
     elif isinstance(img, Path):
+        is_path = True
         try:
             pil_image = Image.open(img)
             pil_image.load()
@@ -109,7 +111,7 @@ def input_to_pil_image(img):
             return None
     else:
         raise ValueError(f'img must be a path, PIL.Image or bytes object, instead got: {img}')
-    return pil_image
+    return pil_image, is_path
 
 
 def pil_image_to_bytes(img, img_format='png', png_compression=6, jpeg_quality=80, optimize=False):
@@ -174,13 +176,14 @@ class MangaOcr:
             logger.info('Manga OCR ready')
 
     def __call__(self, img):
-        img = input_to_pil_image(img)
+        img, is_path = input_to_pil_image(img)
         if not img:
             return (False, 'Invalid image provided')
 
         x = (True, self.model(img))
 
-        img.close()
+        if is_path:
+            img.close()
         return x
 
 class GoogleVision:
@@ -204,7 +207,7 @@ class GoogleVision:
                 logger.warning('Error parsing Google credentials, Google Vision will not work!')
 
     def __call__(self, img):
-        img = input_to_pil_image(img)
+        img, is_path = input_to_pil_image(img)
         if not img:
             return (False, 'Invalid image provided')
 
@@ -220,7 +223,8 @@ class GoogleVision:
         res = texts[0].description if len(texts) > 0 else ''
         x = (True, res)
 
-        img.close()
+        if is_path:
+            img.close()
         return x
 
     def _preprocess(self, img):
@@ -240,7 +244,7 @@ class GoogleLens:
             logger.info('Google Lens ready')
 
     def __call__(self, img):
-        img = input_to_pil_image(img)
+        img, is_path = input_to_pil_image(img)
         if not img:
             return (False, 'Invalid image provided')
 
@@ -310,7 +314,8 @@ class GoogleLens:
 
         x = (True, res)
 
-        img.close()
+        if is_path:
+            img.close()
         return x
 
     def _preprocess(self, img):
@@ -318,9 +323,7 @@ class GoogleLens:
             aspect_ratio = img.width / img.height
             new_w = int(sqrt(3000000 * aspect_ratio))
             new_h = int(new_w / aspect_ratio)
-            img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
-            img.close()
-            img = img_resized
+            img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
 
         return (pil_image_to_bytes(img), img.width, img.height)
 
@@ -339,7 +342,7 @@ class GoogleLensWeb:
             logger.info('Google Lens (web) ready')
 
     def __call__(self, img):
-        img = input_to_pil_image(img)
+        img, is_path = input_to_pil_image(img)
         if not img:
             return (False, 'Invalid image provided')
 
@@ -406,7 +409,8 @@ class GoogleLensWeb:
 
         x = (True, res)
 
-        img.close()
+        if is_path:
+            img.close()
         return x
 
     def _preprocess(self, img):
@@ -414,9 +418,7 @@ class GoogleLensWeb:
             aspect_ratio = img.width / img.height
             new_w = int(sqrt(3000000 * aspect_ratio))
             new_h = int(new_w / aspect_ratio)
-            img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
-            img.close()
-            img = img_resized
+            img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
 
         return pil_image_to_bytes(img)
 
@@ -432,7 +434,7 @@ class Bing:
         logger.info('Bing ready')
 
     def __call__(self, img):
-        img = input_to_pil_image(img)
+        img, is_path = input_to_pil_image(img)
         if not img:
             return (False, 'Invalid image provided')
 
@@ -529,7 +531,8 @@ class Bing:
         
         x = (True, res)
 
-        img.close()
+        if is_path:
+            img.close()
         return x
 
     def _preprocess(self, img):
@@ -541,9 +544,7 @@ class Bing:
             resize_factor = max(max_pixel_size / img.width, max_pixel_size / img.height)
             new_w = int(img.width * resize_factor)
             new_h = int(img.height * resize_factor)
-            img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
-            img.close()
-            img = img_resized
+            img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
 
         img_bytes, _ = limit_image_size(img, max_byte_size)
 
@@ -568,7 +569,7 @@ class AppleVision:
             logger.info('Apple Vision ready')
 
     def __call__(self, img):
-        img = input_to_pil_image(img)
+        img, is_path = input_to_pil_image(img)
         if not img:
             return (False, 'Invalid image provided')
 
@@ -593,7 +594,8 @@ class AppleVision:
             else:
                 x = (False, 'Unknown error!')
 
-            img.close()
+            if is_path:
+                img.close()
             return x
 
     def _preprocess(self, img):
@@ -647,7 +649,7 @@ class AppleLiveText:
             logger.info('Apple Live Text ready')
 
     def __call__(self, img):
-        img = input_to_pil_image(img)
+        img, is_path = input_to_pil_image(img)
         if not img:
             return (False, 'Invalid image provided')
 
@@ -704,7 +706,7 @@ class WinRTOCR:
                 logger.warning('Error reading URL from config, WinRT OCR will not work!')
 
     def __call__(self, img):
-        img = input_to_pil_image(img)
+        img, is_path = input_to_pil_image(img)
         if not img:
             return (False, 'Invalid image provided')
 
@@ -726,7 +728,8 @@ class WinRTOCR:
 
         x = (True, res)
 
-        img.close()
+        if is_path:
+            img.close()
         return x
 
     def _preprocess(self, img):
@@ -761,7 +764,7 @@ class OneOCR:
                 logger.warning('Error reading URL from config, OneOCR will not work!')
 
     def __call__(self, img):
-        img = input_to_pil_image(img)
+        img, is_path = input_to_pil_image(img)
         if not img:
             return (False, 'Invalid image provided')
 
@@ -785,7 +788,8 @@ class OneOCR:
 
         x = (True, res)
 
-        img.close()
+        if is_path:
+            img.close()
         return x
 
     def _preprocess(self, img):
@@ -810,7 +814,7 @@ class AzureImageAnalysis:
                 logger.warning('Error parsing Azure credentials, Azure Image Analysis will not work!')
 
     def __call__(self, img):
-        img = input_to_pil_image(img)
+        img, is_path = input_to_pil_image(img)
         if not img:
             return (False, 'Invalid image provided')
 
@@ -831,7 +835,8 @@ class AzureImageAnalysis:
 
         x = (True, res)
 
-        img.close()
+        if is_path:
+            img.close()
         return x
 
     def _preprocess(self, img):
@@ -839,9 +844,7 @@ class AzureImageAnalysis:
             resize_factor = max(50 / img.width, 50 / img.height)
             new_w = int(img.width * resize_factor)
             new_h = int(img.height * resize_factor)
-            img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
-            img.close()
-            img = img_resized
+            img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
 
         return pil_image_to_bytes(img)
 
@@ -862,7 +865,7 @@ class EasyOCR:
             logger.info('EasyOCR ready')
 
     def __call__(self, img):
-        img = input_to_pil_image(img)
+        img, is_path = input_to_pil_image(img)
         if not img:
             return (False, 'Invalid image provided')
 
@@ -873,7 +876,8 @@ class EasyOCR:
 
         x = (True, res)
 
-        img.close()
+        if is_path:
+            img.close()
         return x
 
     def _preprocess(self, img):
@@ -908,7 +912,7 @@ class RapidOCR:
             logger.info('RapidOCR ready')
 
     def __call__(self, img):
-        img = input_to_pil_image(img)
+        img, is_path = input_to_pil_image(img)
         if not img:
             return (False, 'Invalid image provided')
 
@@ -920,7 +924,8 @@ class RapidOCR:
 
         x = (True, res)
 
-        img.close()
+        if is_path:
+            img.close()
         return x
 
     def _preprocess(self, img):
@@ -942,7 +947,7 @@ class OCRSpace:
             logger.warning('Error reading API key from config, OCRSpace will not work!')
 
     def __call__(self, img):
-        img = input_to_pil_image(img)
+        img, is_path = input_to_pil_image(img)
         if not img:
             return (False, 'Invalid image provided')
 
@@ -976,7 +981,8 @@ class OCRSpace:
         res = res['ParsedResults'][0]['ParsedText']
         x = (True, res)
 
-        img.close()
+        if is_path:
+            img.close()
         return x
 
     def _preprocess(self, img):       
diff --git a/owocr/run.py b/owocr/run.py
index 5c3a2ed..ca59492 100644
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -364,13 +364,15 @@ class TextFiltering:
         return text, orig_text_filtered
 
 
-class ScreenshotClass:
-    def __init__(self):
+class ScreenshotThread(threading.Thread):
+    def __init__(self, screen_capture_on_combo):
+        super().__init__(daemon=True)
         screen_capture_area = config.get_general('screen_capture_area')
         self.macos_window_tracker_instance = None
         self.windows_window_tracker_instance = None
         self.screencapture_window_active = True
         self.screencapture_window_visible = True
+        self.use_periodic_queue = not screen_capture_on_combo
         if screen_capture_area == '':
             self.screencapture_mode = 0
         elif screen_capture_area.startswith('screen_'):
@@ -385,10 +387,10 @@ class ScreenshotClass:
             self.screencapture_mode = 2
 
         if self.screencapture_mode != 2:
-            self.sct = mss.mss()
+            sct = mss.mss()
 
             if self.screencapture_mode == 1:
-                mon = self.sct.monitors
+                mon = sct.monitors
                 if len(mon) <= screen_capture_monitor:
                     raise ValueError('Invalid monitor number in screen_capture_area')
                 coord_left = mon[screen_capture_monitor]['left']
@@ -468,12 +470,6 @@ class ScreenshotClass:
             else:
                 raise ValueError('Window capture is only currently supported on Windows and macOS')
 
-    def __del__(self):
-        if self.macos_window_tracker_instance:
-            self.macos_window_tracker_instance.join()
-        elif self.windows_window_tracker_instance:
-            self.windows_window_tracker_instance.join()
-
     def get_windows_window_handle(self, window_title):
         def callback(hwnd, window_title_part):
             window_title = win32gui.GetWindowText(hwnd)
@@ -580,67 +576,84 @@ class ScreenshotClass:
         if not found:
             on_window_closed(False)
 
-    def __call__(self):
-        if self.screencapture_mode == 2:
-            if sys.platform == 'darwin':
-                with objc.autorelease_pool():
-                    if self.old_macos_screenshot_api:
-                        cg_image = CGWindowListCreateImageFromArray(CGRectNull, [self.window_id], kCGWindowImageBoundsIgnoreFraming)
-                    else:
-                        self.capture_macos_window_screenshot(self.window_id)
-                        try:
-                            cg_image = self.screencapturekit_queue.get(timeout=0.5)
-                        except queue.Empty:
-                            cg_image = None
-                    if not cg_image:
-                        return 0
-                    width = CGImageGetWidth(cg_image)
-                    height = CGImageGetHeight(cg_image)
-                    raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image))
-                    bpr = CGImageGetBytesPerRow(cg_image)
-                img = Image.frombuffer('RGBA', (width, height), raw_data, 'raw', 'BGRA', bpr, 1)
-            else:
-                try:
-                    coord_left, coord_top, right, bottom = win32gui.GetWindowRect(self.window_handle)
-                    coord_width = right - coord_left
-                    coord_height = bottom - coord_top
-
-                    hwnd_dc = win32gui.GetWindowDC(self.window_handle)
-                    mfc_dc = win32ui.CreateDCFromHandle(hwnd_dc)
-                    save_dc = mfc_dc.CreateCompatibleDC()
-
-                    save_bitmap = win32ui.CreateBitmap()
-                    save_bitmap.CreateCompatibleBitmap(mfc_dc, coord_width, coord_height)
-                    save_dc.SelectObject(save_bitmap)
-
-                    result = ctypes.windll.user32.PrintWindow(self.window_handle, save_dc.GetSafeHdc(), 2)
-
-                    bmpinfo = save_bitmap.GetInfo()
-                    bmpstr = save_bitmap.GetBitmapBits(True)
-                except pywintypes.error:
-                    return 0
-                img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
-                try:
-                    win32gui.DeleteObject(save_bitmap.GetHandle())
-                except:
-                    pass
-                try:
-                    save_dc.DeleteDC()
-                except:
-                    pass
-                try:
-                    mfc_dc.DeleteDC()
-                except:
-                    pass
-                try:
-                    win32gui.ReleaseDC(self.window_handle, hwnd_dc)
-                except:
-                    pass                    
+    def write_result(self, result):
+        if self.use_periodic_queue:
+            periodic_screenshot_queue.put(result)
         else:
-            sct_img = self.sct.grab(self.sct_params)
-            img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
+            image_queue.put((result, True))
 
-        return img
+    def run(self):
+        if self.screencapture_mode != 2:
+            sct = mss.mss()
+        while screenshot_event.wait() and not terminated:
+            if self.screencapture_mode == 2:
+                if sys.platform == 'darwin':
+                    with objc.autorelease_pool():
+                        if self.old_macos_screenshot_api:
+                            cg_image = CGWindowListCreateImageFromArray(CGRectNull, [self.window_id], kCGWindowImageBoundsIgnoreFraming)
+                        else:
+                            self.capture_macos_window_screenshot(self.window_id)
+                            try:
+                                cg_image = self.screencapturekit_queue.get(timeout=0.5)
+                            except queue.Empty:
+                                cg_image = None
+                        if not cg_image:
+                            self.write_result(0)
+                            break
+                        width = CGImageGetWidth(cg_image)
+                        height = CGImageGetHeight(cg_image)
+                        raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image))
+                        bpr = CGImageGetBytesPerRow(cg_image)
+                    img = Image.frombuffer('RGBA', (width, height), raw_data, 'raw', 'BGRA', bpr, 1)
+                else:
+                    try:
+                        coord_left, coord_top, right, bottom = win32gui.GetWindowRect(self.window_handle)
+                        coord_width = right - coord_left
+                        coord_height = bottom - coord_top
+
+                        hwnd_dc = win32gui.GetWindowDC(self.window_handle)
+                        mfc_dc = win32ui.CreateDCFromHandle(hwnd_dc)
+                        save_dc = mfc_dc.CreateCompatibleDC()
+
+                        save_bitmap = win32ui.CreateBitmap()
+                        save_bitmap.CreateCompatibleBitmap(mfc_dc, coord_width, coord_height)
+                        save_dc.SelectObject(save_bitmap)
+
+                        result = ctypes.windll.user32.PrintWindow(self.window_handle, save_dc.GetSafeHdc(), 2)
+
+                        bmpinfo = save_bitmap.GetInfo()
+                        bmpstr = save_bitmap.GetBitmapBits(True)
+                    except pywintypes.error:
+                        self.write_result(0)
+                        break
+                    img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
+                    try:
+                        win32gui.DeleteObject(save_bitmap.GetHandle())
+                    except:
+                        pass
+                    try:
+                        save_dc.DeleteDC()
+                    except:
+                        pass
+                    try:
+                        mfc_dc.DeleteDC()
+                    except:
+                        pass
+                    try:
+                        win32gui.ReleaseDC(self.window_handle, hwnd_dc)
+                    except:
+                        pass                    
+            else:
+                sct_img = sct.grab(self.sct_params)
+                img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
+
+            self.write_result(img)
+            screenshot_event.clear()
+
+        if self.macos_window_tracker_instance:
+            self.macos_window_tracker_instance.join()
+        elif self.windows_window_tracker_instance:
+            self.windows_window_tracker_instance.join()
 
 
 class AutopauseTimer:
@@ -758,8 +771,7 @@ def on_window_closed(alive):
 
 def on_screenshot_combo():
     if not paused:
-        img = take_screenshot()
-        image_queue.put((img, True))
+        screenshot_event.set()
 
 
 def process_and_write_results(img_or_path, last_result, filtering, notify):
@@ -880,7 +892,8 @@ def run():
         websocket_server_thread = WebsocketServerThread('websocket' in (read_from, read_from_secondary))
         websocket_server_thread.start()
     if 'screencapture' in (read_from, read_from_secondary):
-        global take_screenshot
+        global screenshot_thread
+        global screenshot_event
         screen_capture_delay_secs = config.get_general('screen_capture_delay_secs')
         screen_capture_combo = config.get_general('screen_capture_combo')
         last_screenshot_time = 0
@@ -888,7 +901,12 @@ def run():
         if screen_capture_combo != '':
             screen_capture_on_combo = True
             key_combos[screen_capture_combo] = on_screenshot_combo
-        take_screenshot = ScreenshotClass()
+        else:
+            global periodic_screenshot_queue
+            periodic_screenshot_queue = queue.Queue()
+        screenshot_event = threading.Event()
+        screenshot_thread = ScreenshotThread(screen_capture_on_combo)
+        screenshot_thread.start()
         filtering = TextFiltering()
         read_from_readable.append('screen capture')
     if 'websocket' in (read_from, read_from_secondary):
@@ -951,8 +969,9 @@ def run():
                 pass
 
         if (not img) and process_screenshots:
-            if (not paused) and take_screenshot.screencapture_window_active and take_screenshot.screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs:
-                img = take_screenshot()
+            if (not paused) and screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs:
+                screenshot_event.set()
+                img = periodic_screenshot_queue.get()
                 filter_img = True
                 notify = False
                 last_screenshot_time = time.time()
diff --git a/pyproject.toml b/pyproject.toml
index ef822e9..f718c3b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "owocr"
-version = "1.14"
+version = "1.14.1"
 description = "Japanese OCR"
 readme = "README.md"
 requires-python = ">=3.11"