From 68626c44a2885a58aa67f29105f8705611d58373 Mon Sep 17 00:00:00 2001 From: AuroraWright Date: Sun, 12 Oct 2025 22:26:45 +0200 Subject: [PATCH] Make second pass multithreaded and refactor coordinate picker to be permanent if needed --- owocr/ocr.py | 13 ++ owocr/run.py | 103 +++++++++--- owocr/screen_coordinate_picker.py | 250 ++++++++++++++++-------------- 3 files changed, 231 insertions(+), 135 deletions(-) diff --git a/owocr/ocr.py b/owocr/ocr.py index 71be724..286ddba 100644 --- a/owocr/ocr.py +++ b/owocr/ocr.py @@ -200,6 +200,7 @@ class MangaOcr: local = True manual_language = False coordinate_support = False + threading_support = True def __init__(self, config={'pretrained_model_name_or_path':'kha-white/manga-ocr-base','force_cpu': False}): if 'manga_ocr' not in sys.modules: @@ -233,6 +234,7 @@ class GoogleVision: local = False manual_language = False coordinate_support = False + threading_support = True def __init__(self): if 'google.cloud' not in sys.modules: @@ -280,6 +282,7 @@ class GoogleLens: local = False manual_language = False coordinate_support = True + threading_support = True def __init__(self): if 'betterproto' not in sys.modules: @@ -427,6 +430,7 @@ class GoogleLensWeb: local = False manual_language = False coordinate_support = False + threading_support = True def __init__(self): if 'pyjson5' not in sys.modules: @@ -524,6 +528,7 @@ class Bing: local = False manual_language = False coordinate_support = True + threading_support = True def __init__(self): self.requests_session = requests.Session() @@ -704,6 +709,7 @@ class AppleVision: local = True manual_language = True coordinate_support = False + threading_support = True def __init__(self, language='ja'): if sys.platform != 'darwin': @@ -756,6 +762,7 @@ class AppleLiveText: local = True manual_language = True coordinate_support = True + threading_support = False def __init__(self, language='ja'): if sys.platform != 'darwin': @@ -897,6 +904,7 @@ class WinRTOCR: local = True manual_language = True coordinate_support = False + threading_support = True def __init__(self, config={}, language='ja'): if sys.platform == 'win32': @@ -955,6 +963,7 @@ class OneOCR: local = True manual_language = False coordinate_support = True + threading_support = True def __init__(self, config={}): if sys.platform == 'win32': @@ -1079,6 +1088,7 @@ class AzureImageAnalysis: local = False manual_language = False coordinate_support = False + threading_support = True def __init__(self, config={}): if 'azure.ai.vision.imageanalysis' not in sys.modules: @@ -1135,6 +1145,7 @@ class EasyOCR: local = True manual_language = True coordinate_support = False + threading_support = True def __init__(self, config={'gpu': True}, language='ja'): if 'easyocr' not in sys.modules: @@ -1173,6 +1184,7 @@ class RapidOCR: local = True manual_language = True coordinate_support = False + threading_support = True def __init__(self, config={'high_accuracy_detection': False, 'high_accuracy_recognition': True}, language='ja'): if 'rapidocr' not in sys.modules: @@ -1238,6 +1250,7 @@ class OCRSpace: local = False manual_language = True coordinate_support = False + threading_support = True def __init__(self, config={}, language='ja'): try: diff --git a/owocr/run.py b/owocr/run.py index c3511a9..388f033 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -783,6 +783,7 @@ class ScreenshotThread(threading.Thread): def __init__(self): super().__init__(daemon=True) screen_capture_area = config.get_general('screen_capture_area') + self.coordinate_selector_combo_enabled = config.get_general('coordinate_selector_combo') != '' self.is_combo_screenshot = False self.macos_window_tracker_instance = None self.windows_window_tracker_instance = None @@ -801,6 +802,9 @@ class ScreenshotThread(threading.Thread): else: self.screencapture_mode = 2 + if self.coordinate_selector_combo_enabled: + self.launch_coordinate_picker(True, False) + if self.screencapture_mode != 2: self.sct = mss.mss() @@ -815,7 +819,7 @@ class ScreenshotThread(threading.Thread): elif self.screencapture_mode == 3: coord_left, coord_top, coord_width, coord_height = [int(c.strip()) for c in screen_capture_area.split(',')] else: - self.launch_coordinate_picker(True) + self.launch_coordinate_picker(False, True) if self.screencapture_mode != 0: self.sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height} @@ -881,7 +885,7 @@ class ScreenshotThread(threading.Thread): logger.opt(ansi=True).info(f'Selected window coordinates: {x},{y},{x2},{y2}') self.window_area_coordinates = (img.size, (x, y, x2, y2)) elif screen_capture_window_area == '': - self.launch_coordinate_picker(True) + self.launch_coordinate_picker(False, False) else: raise ValueError('"screen_capture_window_area" must be empty, "window" for the whole window, or a valid set of coordinates') @@ -1066,10 +1070,14 @@ class ScreenshotThread(threading.Thread): else: periodic_screenshot_queue.put(result) - def launch_coordinate_picker(self, on_init): + def launch_coordinate_picker(self, init, must_return): + if init: + logger.opt(ansi=True).info('Preloading screen coordinate picker') + get_screen_selection(True, True) + return if self.screencapture_mode != 2: logger.opt(ansi=True).info('Launching screen coordinate picker') - screen_selection = get_screen_selection() + screen_selection = get_screen_selection(None, self.coordinate_selector_combo_enabled) if not screen_selection: if on_init: raise ValueError('Picker window was closed or an error occurred') @@ -1093,7 +1101,7 @@ class ScreenshotThread(threading.Thread): self.window_area_coordinates = None img = self.take_screenshot() logger.opt(ansi=True).info('Launching window coordinate picker') - window_selection = get_screen_selection(img) + window_selection = get_screen_selection(img, self.coordinate_selector_combo_enabled) if not window_selection: logger.opt(ansi=True).warning('Picker window was closed or an error occurred, selecting whole window') else: @@ -1112,7 +1120,7 @@ class ScreenshotThread(threading.Thread): while not terminated: if not screenshot_event.wait(timeout=0.1): if coordinate_selector_event.is_set(): - self.launch_coordinate_picker(False) + self.launch_coordinate_picker(False, False) coordinate_selector_event.clear() continue @@ -1130,33 +1138,77 @@ class ScreenshotThread(threading.Thread): self.windows_window_tracker_instance.join() +class SecondPassThread: + def __init__(self): + self.input_queue = queue.Queue() + self.output_queue = queue.Queue() + self.ocr_thread = None + self.running = False + + def __del__(self): + self.stop() + + def start(self): + if self.ocr_thread is None or not self.ocr_thread.is_alive(): + self.running = True + self.ocr_thread = threading.Thread(target=self._process_ocr, daemon=True) + self.ocr_thread.start() + + def stop(self): + self.running = False + if self.ocr_thread and self.ocr_thread.is_alive(): + self.ocr_thread.join() + + def _process_ocr(self): + while self.running and not terminated: + try: + img, engine_instance = self.input_queue.get(timeout=0.1) + + start_time = time.time() + res, result_data = engine_instance(img) + end_time = time.time() + + self.output_queue.put((res, result_data, end_time - start_time)) + except queue.Empty: + continue + + def submit_task(self, img, engine_instance): + self.input_queue.put((img, engine_instance)) + + def get_result(self): + try: + return self.output_queue.get_nowait() + except queue.Empty: + return None + + class AutopauseTimer: def __init__(self, timeout): - self.stop_event = threading.Event() self.timeout = timeout self.timer_thread = None + self.running = False def __del__(self): self.stop() def start(self): self.stop() - self.stop_event.clear() + self.running = True self.timer_thread = threading.Thread(target=self._countdown) self.timer_thread.start() def stop(self): - if not self.stop_event.is_set() and self.timer_thread and self.timer_thread.is_alive(): - self.stop_event.set() + if self.running and self.timer_thread and self.timer_thread.is_alive(): + self.running = False self.timer_thread.join() def _countdown(self): seconds = self.timeout - while seconds > 0 and not self.stop_event.is_set() and not terminated: + while seconds > 0 and self.running and not terminated: time.sleep(1) seconds -= 1 - if not self.stop_event.is_set(): - self.stop_event.set() + if self.running: + self.running = False if not (paused or terminated): pause_handler(True) @@ -1164,6 +1216,10 @@ class AutopauseTimer: class OutputResult: def __init__(self): self.filtering = TextFiltering() + self.second_pass_thread = SecondPassThread() + + def __del__(self): + self.second_pass_thread.stop() def _post_process(self, text, strip_spaces): is_cj_text = self.filtering.cj_regex.search(''.join(text)) @@ -1195,6 +1251,7 @@ class OutputResult: two_pass_processing_active = False if filter_text and engine_index_2 != -1 and engine_index_2 != engine_index: + self.second_pass_thread.start() engine_instance_2 = engine_instances[engine_index_2] start_time = time.time() res2, result_data_2 = engine_instance_2(img_or_path) @@ -1212,12 +1269,22 @@ class OutputResult: if output_format != 'json': if changed_regions_image: img_or_path = changed_regions_image - else: - return - start_time = time.time() - res, result_data = engine_instance(img_or_path) - end_time = time.time() + if engine_instance.threading_support: + self.second_pass_thread.submit_task(img_or_path, engine_instance) + else: + self.second_pass_thread.stop() + + second_pass_result = self.second_pass_thread.get_result() + if second_pass_result: + res, result_data, processing_time = second_pass_result + two_pass_processing_active = True + elif two_pass_processing_active and engine_instance.threading_support: + return + else: + start_time = time.time() + res, result_data = engine_instance(img_or_path) + end_time = time.time() if not res: logger.opt(ansi=True).warning(f'<{engine_color}>{engine_instance.readable_name} reported an error after {end_time - start_time:0.03f}s: {result_data}') diff --git a/owocr/screen_coordinate_picker.py b/owocr/screen_coordinate_picker.py index f5c7c8d..5641820 100644 --- a/owocr/screen_coordinate_picker.py +++ b/owocr/screen_coordinate_picker.py @@ -1,6 +1,12 @@ -from multiprocessing import Process, Manager +import multiprocessing +import queue import mss from PIL import Image +import sys +try: + from AppKit import NSApplication, NSApplicationActivationPolicyAccessory +except ImportError: + pass try: from PIL import ImageTk @@ -11,105 +17,20 @@ except: class ScreenSelector: - def __init__(self, result, input_image=None): + def __init__(self, result_queue, command_queue): self.sct = mss.mss() self.monitors = self.sct.monitors[1:] self.root = None - self.result = result - self.input_image = input_image + self.result_queue = result_queue + self.command_queue = command_queue + self.mac_init_done = False def on_select(self, monitor, coordinates): - self.result['monitor'] = monitor - self.result['coordinates'] = coordinates - self.root.destroy() + self.result_queue.put({'monitor': monitor, 'coordinates': coordinates}) + if self.root: + self.root.destroy() - def create_window_from_image(self, img): - original_width, original_height = img.size - display_monitor = None - - for monitor in self.monitors: - if (monitor['width'] >= original_width and - monitor['height'] >= original_height): - display_monitor = monitor - break - - if not display_monitor: - display_monitor = self.monitors[0] - - window_width = min(original_width, display_monitor['width']) - window_height = min(original_height, display_monitor['height']) - left = display_monitor['left'] + (display_monitor['width'] - window_width) // 2 - top = display_monitor['top'] + (display_monitor['height'] - window_height) // 2 - - window = tk.Toplevel(self.root) - window.geometry(f"{window_width}x{window_height}+{left}+{top}") - window.overrideredirect(1) - window.attributes('-topmost', 1) - - # Resize image if it's larger than the window - if img.width > window_width or img.height > window_height: - img = img.resize((window_width, window_height), Image.Resampling.LANCZOS) - scale_x = original_width / window_width - scale_y = original_height / window_height - else: - scale_x = 1 - scale_y = 1 - - img_tk = ImageTk.PhotoImage(img) - - canvas = tk.Canvas(window, cursor='cross', highlightthickness=0) - canvas.pack(fill=tk.BOTH, expand=True) - canvas.image = img_tk - canvas.create_image(0, 0, image=img_tk, anchor=tk.NW) - - start_x, start_y, rect = None, None, None - - def on_click(event): - nonlocal start_x, start_y, rect - start_x, start_y = event.x, event.y - rect = canvas.create_rectangle(start_x, start_y, start_x, start_y, outline='red') - - def on_drag(event): - nonlocal rect, start_x, start_y - if rect: - canvas.coords(rect, start_x, start_y, event.x, event.y) - - def on_release(event): - nonlocal start_x, start_y, scale_x, scale_y - end_x, end_y = event.x, event.y - - x1 = min(start_x, end_x) - y1 = min(start_y, end_y) - x2 = max(start_x, end_x) - y2 = max(start_y, end_y) - - x1 = int(x1 * scale_x) - y1 = int(y1 * scale_y) - x2 = int(x2 * scale_x) - y2 = int(y2 * scale_y) - - # Return None for monitor when using input image - self.on_select(None, (x1, y1, x2 - x1, y2 - y1)) - - canvas.bind('', on_click) - canvas.bind('', on_drag) - canvas.bind('', on_release) - - def create_window(self, monitor): - screenshot = self.sct.grab(monitor) - img = Image.frombytes('RGB', screenshot.size, screenshot.rgb) - - if img.width != monitor['width']: - img = img.resize((monitor['width'], monitor['height']), Image.Resampling.LANCZOS) - - window = tk.Toplevel(self.root) - window.geometry(f"{monitor['width']}x{monitor['height']}+{monitor['left']}+{monitor['top']}") - window.overrideredirect(1) - window.attributes('-topmost', 1) - - img_tk = ImageTk.PhotoImage(img) - - canvas = tk.Canvas(window, cursor='cross', highlightthickness=0) + def _setup_selection_canvas(self, canvas, img_tk, scale_x=1, scale_y=1, monitor=None): canvas.pack(fill=tk.BOTH, expand=True) canvas.image = img_tk canvas.create_image(0, 0, image=img_tk, anchor=tk.NW) @@ -133,7 +54,12 @@ class ScreenSelector: x1 = min(start_x, end_x) y1 = min(start_y, end_y) x2 = max(start_x, end_x) - y2 = max(start_y, end_y) + y2 = max(start_y, end_y) + + x1 = int(x1 * scale_x) + y1 = int(y1 * scale_y) + x2 = int(x2 * scale_x) + y2 = int(y2 * scale_y) self.on_select(monitor, (x1, y1, x2 - x1, y2 - y1)) @@ -141,37 +67,127 @@ class ScreenSelector: canvas.bind('', on_drag) canvas.bind('', on_release) - def start(self): - self.root = tk.Tk() - self.root.withdraw() + def _create_selection_window(self, img, geometry, scale_x=1, scale_y=1, monitor=None): + window = tk.Toplevel(self.root) + window.geometry(geometry) + window.overrideredirect(1) + window.attributes('-topmost', 1) - if self.input_image: - self.create_window_from_image(self.input_image) + img_tk = ImageTk.PhotoImage(img) + canvas = tk.Canvas(window, cursor='cross', highlightthickness=0) + + self._setup_selection_canvas(canvas, img_tk, scale_x, scale_y, monitor) + + def create_window_from_image(self, img): + original_width, original_height = img.size + display_monitor = None + + for monitor in self.monitors: + if (monitor['width'] >= original_width and + monitor['height'] >= original_height): + display_monitor = monitor + break + + if not display_monitor: + display_monitor = self.monitors[0] + + window_width = min(original_width, display_monitor['width']) + window_height = min(original_height, display_monitor['height']) + left = display_monitor['left'] + (display_monitor['width'] - window_width) // 2 + top = display_monitor['top'] + (display_monitor['height'] - window_height) // 2 + + geometry = f"{window_width}x{window_height}+{left}+{top}" + + if img.width > window_width or img.height > window_height: + img = img.resize((window_width, window_height), Image.Resampling.LANCZOS) + scale_x = original_width / window_width + scale_y = original_height / window_height else: - for monitor in self.monitors: - self.create_window(monitor) + scale_x = 1 + scale_y = 1 - self.root.mainloop() - self.root.update() + self._create_selection_window(img, geometry, scale_x, scale_y, None) + + def create_window(self, monitor): + screenshot = self.sct.grab(monitor) + img = Image.frombytes('RGB', screenshot.size, screenshot.rgb) + original_width, original_height = img.size + + geometry = f"{monitor['width']}x{monitor['height']}+{monitor['left']}+{monitor['top']}" + + if img.width != monitor['width']: + img = img.resize((monitor['width'], monitor['height']), Image.Resampling.LANCZOS) + scale_x = original_width / monitor['width'] + scale_y = original_height / monitor['height'] + else: + scale_x = 1 + scale_y = 1 + + self._create_selection_window(img, geometry, scale_x, scale_y, monitor) + + def start(self): + while True: + try: + image = self.command_queue.get(timeout=0.1) + except queue.Empty: + continue + + if image == False: + break + if image == True: + self.on_select(None, None) + continue + + self.root = tk.Tk() + + if not self.mac_init_done and sys.platform == 'darwin': + app = NSApplication.sharedApplication() + app.setActivationPolicy_(NSApplicationActivationPolicyAccessory) + self.mac_init_done = True + + self.root.withdraw() + + if image: + self.create_window_from_image(image) + else: + for monitor in self.monitors: + self.create_window(monitor) + + self.root.mainloop() + self.root.update() + self.root = None -def run_screen_selector(result, input_image=None): - selector = ScreenSelector(result, input_image) +def run_screen_selector(result_queue, command_queue): + selector = ScreenSelector(result_queue, command_queue) selector.start() +selector_process = None +result_queue = None +command_queue = None + +def get_screen_selection(pil_image, permanent_process): + global selector_process, result_queue, command_queue -def get_screen_selection(pil_image = None): if not selector_available: raise ValueError('tkinter or PIL with tkinter support are not installed, unable to open picker') - with Manager() as manager: - res = manager.dict() - process = Process(target=run_screen_selector, args=(res, pil_image)) - - process.start() - process.join() + if selector_process is None or not selector_process.is_alive(): + result_queue = multiprocessing.Queue() + command_queue = multiprocessing.Queue() + selector_process = multiprocessing.Process(target=run_screen_selector, args=(result_queue, command_queue)) + selector_process.daemon = True + selector_process.start() - if 'monitor' in res and 'coordinates' in res: - return res.copy() - else: - return False + command_queue.put(pil_image) + + result = False + while (not result) and selector_process.is_alive(): + try: + result = result_queue.get(timeout=0.1) # 60 second timeout + except: + continue + if not permanent_process: + command_queue.put(False) + selector_process.join() + return result