From 2cb828d1dafb24c61ab9ae7c2a0c1ce8f5a8089e Mon Sep 17 00:00:00 2001 From: AuroraWright Date: Mon, 16 Jun 2025 16:41:08 +0200 Subject: [PATCH] Fix Linux crashes --- owocr/ocr.py | 82 +++++++++++++----------- owocr/run.py | 169 +++++++++++++++++++++++++++---------------------- pyproject.toml | 2 +- 3 files changed, 139 insertions(+), 114 deletions(-) diff --git a/owocr/ocr.py b/owocr/ocr.py index fc9532e..b99a650 100644 --- a/owocr/ocr.py +++ b/owocr/ocr.py @@ -97,11 +97,13 @@ def post_process(text): def input_to_pil_image(img): + is_path = False if isinstance(img, Image.Image): pil_image = img elif isinstance(img, (bytes, bytearray)): pil_image = Image.open(io.BytesIO(img)) elif isinstance(img, Path): + is_path = True try: pil_image = Image.open(img) pil_image.load() @@ -109,7 +111,7 @@ def input_to_pil_image(img): return None else: raise ValueError(f'img must be a path, PIL.Image or bytes object, instead got: {img}') - return pil_image + return pil_image, is_path def pil_image_to_bytes(img, img_format='png', png_compression=6, jpeg_quality=80, optimize=False): @@ -174,13 +176,14 @@ class MangaOcr: logger.info('Manga OCR ready') def __call__(self, img): - img = input_to_pil_image(img) + img, is_path = input_to_pil_image(img) if not img: return (False, 'Invalid image provided') x = (True, self.model(img)) - img.close() + if is_path: + img.close() return x class GoogleVision: @@ -204,7 +207,7 @@ class GoogleVision: logger.warning('Error parsing Google credentials, Google Vision will not work!') def __call__(self, img): - img = input_to_pil_image(img) + img, is_path = input_to_pil_image(img) if not img: return (False, 'Invalid image provided') @@ -220,7 +223,8 @@ class GoogleVision: res = texts[0].description if len(texts) > 0 else '' x = (True, res) - img.close() + if is_path: + img.close() return x def _preprocess(self, img): @@ -240,7 +244,7 @@ class GoogleLens: logger.info('Google Lens ready') def __call__(self, img): - img = input_to_pil_image(img) + img, is_path = input_to_pil_image(img) if not img: return (False, 'Invalid image provided') @@ -310,7 +314,8 @@ class GoogleLens: x = (True, res) - img.close() + if is_path: + img.close() return x def _preprocess(self, img): @@ -318,9 +323,7 @@ class GoogleLens: aspect_ratio = img.width / img.height new_w = int(sqrt(3000000 * aspect_ratio)) new_h = int(new_w / aspect_ratio) - img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS) - img.close() - img = img_resized + img = img.resize((new_w, new_h), Image.Resampling.LANCZOS) return (pil_image_to_bytes(img), img.width, img.height) @@ -339,7 +342,7 @@ class GoogleLensWeb: logger.info('Google Lens (web) ready') def __call__(self, img): - img = input_to_pil_image(img) + img, is_path = input_to_pil_image(img) if not img: return (False, 'Invalid image provided') @@ -406,7 +409,8 @@ class GoogleLensWeb: x = (True, res) - img.close() + if is_path: + img.close() return x def _preprocess(self, img): @@ -414,9 +418,7 @@ class GoogleLensWeb: aspect_ratio = img.width / img.height new_w = int(sqrt(3000000 * aspect_ratio)) new_h = int(new_w / aspect_ratio) - img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS) - img.close() - img = img_resized + img = img.resize((new_w, new_h), Image.Resampling.LANCZOS) return pil_image_to_bytes(img) @@ -432,7 +434,7 @@ class Bing: logger.info('Bing ready') def __call__(self, img): - img = input_to_pil_image(img) + img, is_path = input_to_pil_image(img) if not img: return (False, 'Invalid image provided') @@ -529,7 +531,8 @@ class Bing: x = (True, res) - img.close() + if is_path: + img.close() return x def _preprocess(self, img): @@ -541,9 +544,7 @@ class Bing: resize_factor = max(max_pixel_size / img.width, max_pixel_size / img.height) new_w = int(img.width * resize_factor) new_h = int(img.height * resize_factor) - img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS) - img.close() - img = img_resized + img = img.resize((new_w, new_h), Image.Resampling.LANCZOS) img_bytes, _ = limit_image_size(img, max_byte_size) @@ -568,7 +569,7 @@ class AppleVision: logger.info('Apple Vision ready') def __call__(self, img): - img = input_to_pil_image(img) + img, is_path = input_to_pil_image(img) if not img: return (False, 'Invalid image provided') @@ -593,7 +594,8 @@ class AppleVision: else: x = (False, 'Unknown error!') - img.close() + if is_path: + img.close() return x def _preprocess(self, img): @@ -647,7 +649,7 @@ class AppleLiveText: logger.info('Apple Live Text ready') def __call__(self, img): - img = input_to_pil_image(img) + img, is_path = input_to_pil_image(img) if not img: return (False, 'Invalid image provided') @@ -704,7 +706,7 @@ class WinRTOCR: logger.warning('Error reading URL from config, WinRT OCR will not work!') def __call__(self, img): - img = input_to_pil_image(img) + img, is_path = input_to_pil_image(img) if not img: return (False, 'Invalid image provided') @@ -726,7 +728,8 @@ class WinRTOCR: x = (True, res) - img.close() + if is_path: + img.close() return x def _preprocess(self, img): @@ -761,7 +764,7 @@ class OneOCR: logger.warning('Error reading URL from config, OneOCR will not work!') def __call__(self, img): - img = input_to_pil_image(img) + img, is_path = input_to_pil_image(img) if not img: return (False, 'Invalid image provided') @@ -785,7 +788,8 @@ class OneOCR: x = (True, res) - img.close() + if is_path: + img.close() return x def _preprocess(self, img): @@ -810,7 +814,7 @@ class AzureImageAnalysis: logger.warning('Error parsing Azure credentials, Azure Image Analysis will not work!') def __call__(self, img): - img = input_to_pil_image(img) + img, is_path = input_to_pil_image(img) if not img: return (False, 'Invalid image provided') @@ -831,7 +835,8 @@ class AzureImageAnalysis: x = (True, res) - img.close() + if is_path: + img.close() return x def _preprocess(self, img): @@ -839,9 +844,7 @@ class AzureImageAnalysis: resize_factor = max(50 / img.width, 50 / img.height) new_w = int(img.width * resize_factor) new_h = int(img.height * resize_factor) - img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS) - img.close() - img = img_resized + img = img.resize((new_w, new_h), Image.Resampling.LANCZOS) return pil_image_to_bytes(img) @@ -862,7 +865,7 @@ class EasyOCR: logger.info('EasyOCR ready') def __call__(self, img): - img = input_to_pil_image(img) + img, is_path = input_to_pil_image(img) if not img: return (False, 'Invalid image provided') @@ -873,7 +876,8 @@ class EasyOCR: x = (True, res) - img.close() + if is_path: + img.close() return x def _preprocess(self, img): @@ -908,7 +912,7 @@ class RapidOCR: logger.info('RapidOCR ready') def __call__(self, img): - img = input_to_pil_image(img) + img, is_path = input_to_pil_image(img) if not img: return (False, 'Invalid image provided') @@ -920,7 +924,8 @@ class RapidOCR: x = (True, res) - img.close() + if is_path: + img.close() return x def _preprocess(self, img): @@ -942,7 +947,7 @@ class OCRSpace: logger.warning('Error reading API key from config, OCRSpace will not work!') def __call__(self, img): - img = input_to_pil_image(img) + img, is_path = input_to_pil_image(img) if not img: return (False, 'Invalid image provided') @@ -976,7 +981,8 @@ class OCRSpace: res = res['ParsedResults'][0]['ParsedText'] x = (True, res) - img.close() + if is_path: + img.close() return x def _preprocess(self, img): diff --git a/owocr/run.py b/owocr/run.py index 5c3a2ed..ca59492 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -364,13 +364,15 @@ class TextFiltering: return text, orig_text_filtered -class ScreenshotClass: - def __init__(self): +class ScreenshotThread(threading.Thread): + def __init__(self, screen_capture_on_combo): + super().__init__(daemon=True) screen_capture_area = config.get_general('screen_capture_area') self.macos_window_tracker_instance = None self.windows_window_tracker_instance = None self.screencapture_window_active = True self.screencapture_window_visible = True + self.use_periodic_queue = not screen_capture_on_combo if screen_capture_area == '': self.screencapture_mode = 0 elif screen_capture_area.startswith('screen_'): @@ -385,10 +387,10 @@ class ScreenshotClass: self.screencapture_mode = 2 if self.screencapture_mode != 2: - self.sct = mss.mss() + sct = mss.mss() if self.screencapture_mode == 1: - mon = self.sct.monitors + mon = sct.monitors if len(mon) <= screen_capture_monitor: raise ValueError('Invalid monitor number in screen_capture_area') coord_left = mon[screen_capture_monitor]['left'] @@ -468,12 +470,6 @@ class ScreenshotClass: else: raise ValueError('Window capture is only currently supported on Windows and macOS') - def __del__(self): - if self.macos_window_tracker_instance: - self.macos_window_tracker_instance.join() - elif self.windows_window_tracker_instance: - self.windows_window_tracker_instance.join() - def get_windows_window_handle(self, window_title): def callback(hwnd, window_title_part): window_title = win32gui.GetWindowText(hwnd) @@ -580,67 +576,84 @@ class ScreenshotClass: if not found: on_window_closed(False) - def __call__(self): - if self.screencapture_mode == 2: - if sys.platform == 'darwin': - with objc.autorelease_pool(): - if self.old_macos_screenshot_api: - cg_image = CGWindowListCreateImageFromArray(CGRectNull, [self.window_id], kCGWindowImageBoundsIgnoreFraming) - else: - self.capture_macos_window_screenshot(self.window_id) - try: - cg_image = self.screencapturekit_queue.get(timeout=0.5) - except queue.Empty: - cg_image = None - if not cg_image: - return 0 - width = CGImageGetWidth(cg_image) - height = CGImageGetHeight(cg_image) - raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image)) - bpr = CGImageGetBytesPerRow(cg_image) - img = Image.frombuffer('RGBA', (width, height), raw_data, 'raw', 'BGRA', bpr, 1) - else: - try: - coord_left, coord_top, right, bottom = win32gui.GetWindowRect(self.window_handle) - coord_width = right - coord_left - coord_height = bottom - coord_top - - hwnd_dc = win32gui.GetWindowDC(self.window_handle) - mfc_dc = win32ui.CreateDCFromHandle(hwnd_dc) - save_dc = mfc_dc.CreateCompatibleDC() - - save_bitmap = win32ui.CreateBitmap() - save_bitmap.CreateCompatibleBitmap(mfc_dc, coord_width, coord_height) - save_dc.SelectObject(save_bitmap) - - result = ctypes.windll.user32.PrintWindow(self.window_handle, save_dc.GetSafeHdc(), 2) - - bmpinfo = save_bitmap.GetInfo() - bmpstr = save_bitmap.GetBitmapBits(True) - except pywintypes.error: - return 0 - img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1) - try: - win32gui.DeleteObject(save_bitmap.GetHandle()) - except: - pass - try: - save_dc.DeleteDC() - except: - pass - try: - mfc_dc.DeleteDC() - except: - pass - try: - win32gui.ReleaseDC(self.window_handle, hwnd_dc) - except: - pass + def write_result(self, result): + if self.use_periodic_queue: + periodic_screenshot_queue.put(result) else: - sct_img = self.sct.grab(self.sct_params) - img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX') + image_queue.put((result, True)) - return img + def run(self): + if self.screencapture_mode != 2: + sct = mss.mss() + while screenshot_event.wait() and not terminated: + if self.screencapture_mode == 2: + if sys.platform == 'darwin': + with objc.autorelease_pool(): + if self.old_macos_screenshot_api: + cg_image = CGWindowListCreateImageFromArray(CGRectNull, [self.window_id], kCGWindowImageBoundsIgnoreFraming) + else: + self.capture_macos_window_screenshot(self.window_id) + try: + cg_image = self.screencapturekit_queue.get(timeout=0.5) + except queue.Empty: + cg_image = None + if not cg_image: + self.write_result(0) + break + width = CGImageGetWidth(cg_image) + height = CGImageGetHeight(cg_image) + raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image)) + bpr = CGImageGetBytesPerRow(cg_image) + img = Image.frombuffer('RGBA', (width, height), raw_data, 'raw', 'BGRA', bpr, 1) + else: + try: + coord_left, coord_top, right, bottom = win32gui.GetWindowRect(self.window_handle) + coord_width = right - coord_left + coord_height = bottom - coord_top + + hwnd_dc = win32gui.GetWindowDC(self.window_handle) + mfc_dc = win32ui.CreateDCFromHandle(hwnd_dc) + save_dc = mfc_dc.CreateCompatibleDC() + + save_bitmap = win32ui.CreateBitmap() + save_bitmap.CreateCompatibleBitmap(mfc_dc, coord_width, coord_height) + save_dc.SelectObject(save_bitmap) + + result = ctypes.windll.user32.PrintWindow(self.window_handle, save_dc.GetSafeHdc(), 2) + + bmpinfo = save_bitmap.GetInfo() + bmpstr = save_bitmap.GetBitmapBits(True) + except pywintypes.error: + self.write_result(0) + break + img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1) + try: + win32gui.DeleteObject(save_bitmap.GetHandle()) + except: + pass + try: + save_dc.DeleteDC() + except: + pass + try: + mfc_dc.DeleteDC() + except: + pass + try: + win32gui.ReleaseDC(self.window_handle, hwnd_dc) + except: + pass + else: + sct_img = sct.grab(self.sct_params) + img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX') + + self.write_result(img) + screenshot_event.clear() + + if self.macos_window_tracker_instance: + self.macos_window_tracker_instance.join() + elif self.windows_window_tracker_instance: + self.windows_window_tracker_instance.join() class AutopauseTimer: @@ -758,8 +771,7 @@ def on_window_closed(alive): def on_screenshot_combo(): if not paused: - img = take_screenshot() - image_queue.put((img, True)) + screenshot_event.set() def process_and_write_results(img_or_path, last_result, filtering, notify): @@ -880,7 +892,8 @@ def run(): websocket_server_thread = WebsocketServerThread('websocket' in (read_from, read_from_secondary)) websocket_server_thread.start() if 'screencapture' in (read_from, read_from_secondary): - global take_screenshot + global screenshot_thread + global screenshot_event screen_capture_delay_secs = config.get_general('screen_capture_delay_secs') screen_capture_combo = config.get_general('screen_capture_combo') last_screenshot_time = 0 @@ -888,7 +901,12 @@ def run(): if screen_capture_combo != '': screen_capture_on_combo = True key_combos[screen_capture_combo] = on_screenshot_combo - take_screenshot = ScreenshotClass() + else: + global periodic_screenshot_queue + periodic_screenshot_queue = queue.Queue() + screenshot_event = threading.Event() + screenshot_thread = ScreenshotThread(screen_capture_on_combo) + screenshot_thread.start() filtering = TextFiltering() read_from_readable.append('screen capture') if 'websocket' in (read_from, read_from_secondary): @@ -951,8 +969,9 @@ def run(): pass if (not img) and process_screenshots: - if (not paused) and take_screenshot.screencapture_window_active and take_screenshot.screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs: - img = take_screenshot() + if (not paused) and screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs: + screenshot_event.set() + img = periodic_screenshot_queue.get() filter_img = True notify = False last_screenshot_time = time.time() diff --git a/pyproject.toml b/pyproject.toml index ef822e9..f718c3b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "owocr" -version = "1.14" +version = "1.14.1" description = "Japanese OCR" readme = "README.md" requires-python = ">=3.11"