Make second pass multithreaded and refactor coordinate picker to be permanent if needed

2025-10-12 22:26:45 +02:00
parent 14066bdc95
commit 68626c44a2
3 changed files with 231 additions and 135 deletions
--- a/owocr/ocr.py
+++ b/owocr/ocr.py
@@ -200,6 +200,7 @@ class MangaOcr:
    local = True
    manual_language = False
    coordinate_support = False
+    threading_support = True

    def __init__(self, config={'pretrained_model_name_or_path':'kha-white/manga-ocr-base','force_cpu': False}):
        if 'manga_ocr' not in sys.modules:
@@ -233,6 +234,7 @@ class GoogleVision:
    local = False
    manual_language = False
    coordinate_support = False
+    threading_support = True

    def __init__(self):
        if 'google.cloud' not in sys.modules:
@@ -280,6 +282,7 @@ class GoogleLens:
    local = False
    manual_language = False
    coordinate_support = True
+    threading_support = True

    def __init__(self):
        if 'betterproto' not in sys.modules:
@@ -427,6 +430,7 @@ class GoogleLensWeb:
    local = False
    manual_language = False
    coordinate_support = False
+    threading_support = True

    def __init__(self):
        if 'pyjson5' not in sys.modules:
@@ -524,6 +528,7 @@ class Bing:
    local = False
    manual_language = False
    coordinate_support = True
+    threading_support = True

    def __init__(self):
        self.requests_session = requests.Session()
@@ -704,6 +709,7 @@ class AppleVision:
    local = True
    manual_language = True
    coordinate_support = False
+    threading_support = True

    def __init__(self, language='ja'):
        if sys.platform != 'darwin':
@@ -756,6 +762,7 @@ class AppleLiveText:
    local = True
    manual_language = True
    coordinate_support = True
+    threading_support = False

    def __init__(self, language='ja'):
        if sys.platform != 'darwin':
@@ -897,6 +904,7 @@ class WinRTOCR:
    local = True
    manual_language = True
    coordinate_support = False
+    threading_support = True

    def __init__(self, config={}, language='ja'):
        if sys.platform == 'win32':
@@ -955,6 +963,7 @@ class OneOCR:
    local = True
    manual_language = False
    coordinate_support = True
+    threading_support = True

    def __init__(self, config={}):
        if sys.platform == 'win32':
@@ -1079,6 +1088,7 @@ class AzureImageAnalysis:
    local = False
    manual_language = False
    coordinate_support = False
+    threading_support = True

    def __init__(self, config={}):
        if 'azure.ai.vision.imageanalysis' not in sys.modules:
@@ -1135,6 +1145,7 @@ class EasyOCR:
    local = True
    manual_language = True
    coordinate_support = False
+    threading_support = True

    def __init__(self, config={'gpu': True}, language='ja'):
        if 'easyocr' not in sys.modules:
@@ -1173,6 +1184,7 @@ class RapidOCR:
    local = True
    manual_language = True
    coordinate_support = False
+    threading_support = True

    def __init__(self, config={'high_accuracy_detection': False, 'high_accuracy_recognition': True}, language='ja'):
        if 'rapidocr' not in sys.modules:
@@ -1238,6 +1250,7 @@ class OCRSpace:
    local = False
    manual_language = True
    coordinate_support = False
+    threading_support = True

    def __init__(self, config={}, language='ja'):
        try:
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -783,6 +783,7 @@ class ScreenshotThread(threading.Thread):
    def __init__(self):
        super().__init__(daemon=True)
        screen_capture_area = config.get_general('screen_capture_area')
+        self.coordinate_selector_combo_enabled = config.get_general('coordinate_selector_combo') != ''
        self.is_combo_screenshot = False
        self.macos_window_tracker_instance = None
        self.windows_window_tracker_instance = None
@@ -801,6 +802,9 @@ class ScreenshotThread(threading.Thread):
        else:
            self.screencapture_mode = 2

+        if self.coordinate_selector_combo_enabled:
+            self.launch_coordinate_picker(True, False)
+
        if self.screencapture_mode != 2:
            self.sct = mss.mss()

@@ -815,7 +819,7 @@ class ScreenshotThread(threading.Thread):
            elif self.screencapture_mode == 3:
                coord_left, coord_top, coord_width, coord_height = [int(c.strip()) for c in screen_capture_area.split(',')]
            else:
-                self.launch_coordinate_picker(True)
+                self.launch_coordinate_picker(False, True)

            if self.screencapture_mode != 0:
                self.sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height}
@@ -881,7 +885,7 @@ class ScreenshotThread(threading.Thread):
                    logger.opt(ansi=True).info(f'Selected window coordinates: {x},{y},{x2},{y2}')
                    self.window_area_coordinates = (img.size, (x, y, x2, y2))
                elif screen_capture_window_area == '':
-                    self.launch_coordinate_picker(True)
+                    self.launch_coordinate_picker(False, False)
                else:
                    raise ValueError('"screen_capture_window_area" must be empty, "window" for the whole window, or a valid set of coordinates')

@@ -1066,10 +1070,14 @@ class ScreenshotThread(threading.Thread):
        else:
            periodic_screenshot_queue.put(result)

-    def launch_coordinate_picker(self, on_init):
+    def launch_coordinate_picker(self, init, must_return):
+        if init:
+            logger.opt(ansi=True).info('Preloading screen coordinate picker')
+            get_screen_selection(True, True)
+            return
        if self.screencapture_mode != 2:
            logger.opt(ansi=True).info('Launching screen coordinate picker')
-            screen_selection = get_screen_selection()
+            screen_selection = get_screen_selection(None, self.coordinate_selector_combo_enabled)
            if not screen_selection:
                if on_init:
                    raise ValueError('Picker window was closed or an error occurred')
@@ -1093,7 +1101,7 @@ class ScreenshotThread(threading.Thread):
            self.window_area_coordinates = None
            img = self.take_screenshot()
            logger.opt(ansi=True).info('Launching window coordinate picker')
-            window_selection = get_screen_selection(img)
+            window_selection = get_screen_selection(img, self.coordinate_selector_combo_enabled)
            if not window_selection:
                logger.opt(ansi=True).warning('Picker window was closed or an error occurred, selecting whole window')
            else:
@@ -1112,7 +1120,7 @@ class ScreenshotThread(threading.Thread):
        while not terminated:
            if not screenshot_event.wait(timeout=0.1):
                if coordinate_selector_event.is_set():
-                    self.launch_coordinate_picker(False)
+                    self.launch_coordinate_picker(False, False)
                    coordinate_selector_event.clear()
                continue

@@ -1130,33 +1138,77 @@ class ScreenshotThread(threading.Thread):
            self.windows_window_tracker_instance.join()


+class SecondPassThread:
+    def __init__(self):
+        self.input_queue = queue.Queue()
+        self.output_queue = queue.Queue()
+        self.ocr_thread = None
+        self.running = False
+
+    def __del__(self):
+        self.stop()
+
+    def start(self):
+        if self.ocr_thread is None or not self.ocr_thread.is_alive():
+            self.running = True
+            self.ocr_thread = threading.Thread(target=self._process_ocr, daemon=True)
+            self.ocr_thread.start()
+    
+    def stop(self):
+        self.running = False
+        if self.ocr_thread and self.ocr_thread.is_alive():
+            self.ocr_thread.join()
+    
+    def _process_ocr(self):
+        while self.running and not terminated:
+            try:
+                img, engine_instance = self.input_queue.get(timeout=0.1)
+
+                start_time = time.time()
+                res, result_data = engine_instance(img)
+                end_time = time.time()
+
+                self.output_queue.put((res, result_data, end_time - start_time))
+            except queue.Empty:
+                continue
+
+    def submit_task(self, img, engine_instance):
+        self.input_queue.put((img, engine_instance))
+    
+    def get_result(self):
+        try:
+            return self.output_queue.get_nowait()
+        except queue.Empty:
+            return None
+
+
 class AutopauseTimer:
    def __init__(self, timeout):
-        self.stop_event = threading.Event()
        self.timeout = timeout
        self.timer_thread = None
+        self.running = False

    def __del__(self):
        self.stop()

    def start(self):
        self.stop()
-        self.stop_event.clear()
+        self.running = True
        self.timer_thread = threading.Thread(target=self._countdown)
        self.timer_thread.start()

    def stop(self):
-        if not self.stop_event.is_set() and self.timer_thread and self.timer_thread.is_alive():
-            self.stop_event.set()
+        if self.running and self.timer_thread and self.timer_thread.is_alive():
+            self.running = False
            self.timer_thread.join()

    def _countdown(self):
        seconds = self.timeout
-        while seconds > 0 and not self.stop_event.is_set() and not terminated:
+        while seconds > 0 and self.running and not terminated:
            time.sleep(1)
            seconds -= 1
-        if not self.stop_event.is_set():
-            self.stop_event.set()
+        if self.running:
+            self.running = False
            if not (paused or terminated):
                pause_handler(True)

@@ -1164,6 +1216,10 @@ class AutopauseTimer:
 class OutputResult:
    def __init__(self):
        self.filtering = TextFiltering()
+        self.second_pass_thread = SecondPassThread()
+
+    def __del__(self):
+        self.second_pass_thread.stop()

    def _post_process(self, text, strip_spaces):
        is_cj_text = self.filtering.cj_regex.search(''.join(text))
@@ -1195,6 +1251,7 @@ class OutputResult:
        two_pass_processing_active = False

        if filter_text and engine_index_2 != -1 and engine_index_2 != engine_index:
+            self.second_pass_thread.start()
            engine_instance_2 = engine_instances[engine_index_2]
            start_time = time.time()
            res2, result_data_2 = engine_instance_2(img_or_path)
@@ -1212,12 +1269,22 @@ class OutputResult:
                    if output_format != 'json':
                        if changed_regions_image:
                            img_or_path = changed_regions_image
-                else:
-                    return

-        start_time = time.time()
-        res, result_data = engine_instance(img_or_path)
-        end_time = time.time()
+                    if engine_instance.threading_support:
+                        self.second_pass_thread.submit_task(img_or_path, engine_instance)
+        else:
+            self.second_pass_thread.stop()
+
+        second_pass_result = self.second_pass_thread.get_result()
+        if second_pass_result:
+            res, result_data, processing_time = second_pass_result
+            two_pass_processing_active = True
+        elif two_pass_processing_active and engine_instance.threading_support:
+            return
+        else:
+            start_time = time.time()
+            res, result_data = engine_instance(img_or_path)
+            end_time = time.time()

        if not res:
            logger.opt(ansi=True).warning(f'<{engine_color}>{engine_instance.readable_name}</{engine_color}> reported an error after {end_time - start_time:0.03f}s: {result_data}')
--- a/owocr/screen_coordinate_picker.py
+++ b/owocr/screen_coordinate_picker.py
@@ -1,6 +1,12 @@
-from multiprocessing import Process, Manager
+import multiprocessing
+import queue
 import mss
 from PIL import Image
+import sys
+try:
+    from AppKit import NSApplication, NSApplicationActivationPolicyAccessory
+except ImportError:
+    pass

 try:
    from PIL import ImageTk
@@ -11,105 +17,20 @@ except:


 class ScreenSelector:
-    def __init__(self, result, input_image=None):
+    def __init__(self, result_queue, command_queue):
        self.sct = mss.mss()
        self.monitors = self.sct.monitors[1:]
        self.root = None
-        self.result = result
-        self.input_image = input_image
+        self.result_queue = result_queue
+        self.command_queue = command_queue
+        self.mac_init_done = False

    def on_select(self, monitor, coordinates):
-        self.result['monitor'] = monitor
-        self.result['coordinates'] = coordinates
-        self.root.destroy()
+        self.result_queue.put({'monitor': monitor, 'coordinates': coordinates})
+        if self.root:
+            self.root.destroy()

-    def create_window_from_image(self, img):
-        original_width, original_height = img.size
-        display_monitor = None
-        
-        for monitor in self.monitors:
-            if (monitor['width'] >= original_width and 
-                monitor['height'] >= original_height):
-                display_monitor = monitor
-                break
-
-        if not display_monitor:
-            display_monitor = self.monitors[0]
-
-        window_width = min(original_width, display_monitor['width'])
-        window_height = min(original_height, display_monitor['height'])
-        left = display_monitor['left'] + (display_monitor['width'] - window_width) // 2
-        top = display_monitor['top'] + (display_monitor['height'] - window_height) // 2
-
-        window = tk.Toplevel(self.root)
-        window.geometry(f"{window_width}x{window_height}+{left}+{top}")
-        window.overrideredirect(1)
-        window.attributes('-topmost', 1)
-
-        # Resize image if it's larger than the window
-        if img.width > window_width or img.height > window_height:
-            img = img.resize((window_width, window_height), Image.Resampling.LANCZOS)
-            scale_x = original_width / window_width
-            scale_y = original_height / window_height
-        else:
-            scale_x = 1
-            scale_y = 1
-
-        img_tk = ImageTk.PhotoImage(img)
-
-        canvas = tk.Canvas(window, cursor='cross', highlightthickness=0)
-        canvas.pack(fill=tk.BOTH, expand=True)
-        canvas.image = img_tk
-        canvas.create_image(0, 0, image=img_tk, anchor=tk.NW)
-
-        start_x, start_y, rect = None, None, None
-
-        def on_click(event):
-            nonlocal start_x, start_y, rect
-            start_x, start_y = event.x, event.y
-            rect = canvas.create_rectangle(start_x, start_y, start_x, start_y, outline='red')
-
-        def on_drag(event):
-            nonlocal rect, start_x, start_y
-            if rect:
-                canvas.coords(rect, start_x, start_y, event.x, event.y)
-
-        def on_release(event):
-            nonlocal start_x, start_y, scale_x, scale_y
-            end_x, end_y = event.x, event.y
-            
-            x1 = min(start_x, end_x) 
-            y1 = min(start_y, end_y) 
-            x2 = max(start_x, end_x) 
-            y2 = max(start_y, end_y)
-
-            x1 = int(x1 * scale_x)
-            y1 = int(y1 * scale_y)
-            x2 = int(x2 * scale_x)
-            y2 = int(y2 * scale_y)
-            
-            # Return None for monitor when using input image
-            self.on_select(None, (x1, y1, x2 - x1, y2 - y1))
-
-        canvas.bind('<ButtonPress-1>', on_click)
-        canvas.bind('<B1-Motion>', on_drag)
-        canvas.bind('<ButtonRelease-1>', on_release)
-
-    def create_window(self, monitor):
-        screenshot = self.sct.grab(monitor)
-        img = Image.frombytes('RGB', screenshot.size, screenshot.rgb)
-
-        if img.width != monitor['width']:
-            img = img.resize((monitor['width'], monitor['height']), Image.Resampling.LANCZOS)
-
-        window = tk.Toplevel(self.root)
-        window.geometry(f"{monitor['width']}x{monitor['height']}+{monitor['left']}+{monitor['top']}")
-        window.overrideredirect(1)
-        window.attributes('-topmost', 1)
-
-        img_tk = ImageTk.PhotoImage(img)
-
-        canvas = tk.Canvas(window, cursor='cross', highlightthickness=0)
+    def _setup_selection_canvas(self, canvas, img_tk, scale_x=1, scale_y=1, monitor=None):
        canvas.pack(fill=tk.BOTH, expand=True)
        canvas.image = img_tk
        canvas.create_image(0, 0, image=img_tk, anchor=tk.NW)
@@ -133,7 +54,12 @@ class ScreenSelector:
            x1 = min(start_x, end_x) 
            y1 = min(start_y, end_y) 
            x2 = max(start_x, end_x) 
-            y2 = max(start_y, end_y) 
+            y2 = max(start_y, end_y)
+
+            x1 = int(x1 * scale_x)
+            y1 = int(y1 * scale_y)
+            x2 = int(x2 * scale_x)
+            y2 = int(y2 * scale_y)
            
            self.on_select(monitor, (x1, y1, x2 - x1, y2 - y1))

@@ -141,37 +67,127 @@ class ScreenSelector:
        canvas.bind('<B1-Motion>', on_drag)
        canvas.bind('<ButtonRelease-1>', on_release)

-    def start(self):
-        self.root = tk.Tk()
-        self.root.withdraw()
+    def _create_selection_window(self, img, geometry, scale_x=1, scale_y=1, monitor=None):
+        window = tk.Toplevel(self.root)
+        window.geometry(geometry)
+        window.overrideredirect(1)
+        window.attributes('-topmost', 1)

-        if self.input_image:
-            self.create_window_from_image(self.input_image)
+        img_tk = ImageTk.PhotoImage(img)
+        canvas = tk.Canvas(window, cursor='cross', highlightthickness=0)
+        
+        self._setup_selection_canvas(canvas, img_tk, scale_x, scale_y, monitor)
+
+    def create_window_from_image(self, img):
+        original_width, original_height = img.size
+        display_monitor = None
+        
+        for monitor in self.monitors:
+            if (monitor['width'] >= original_width and 
+                monitor['height'] >= original_height):
+                display_monitor = monitor
+                break
+
+        if not display_monitor:
+            display_monitor = self.monitors[0]
+
+        window_width = min(original_width, display_monitor['width'])
+        window_height = min(original_height, display_monitor['height'])
+        left = display_monitor['left'] + (display_monitor['width'] - window_width) // 2
+        top = display_monitor['top'] + (display_monitor['height'] - window_height) // 2
+
+        geometry = f"{window_width}x{window_height}+{left}+{top}"
+
+        if img.width > window_width or img.height > window_height:
+            img = img.resize((window_width, window_height), Image.Resampling.LANCZOS)
+            scale_x = original_width / window_width
+            scale_y = original_height / window_height
        else:
-            for monitor in self.monitors:
-                self.create_window(monitor)
+            scale_x = 1
+            scale_y = 1

-        self.root.mainloop()
-        self.root.update()
+        self._create_selection_window(img, geometry, scale_x, scale_y, None)
+
+    def create_window(self, monitor):
+        screenshot = self.sct.grab(monitor)
+        img = Image.frombytes('RGB', screenshot.size, screenshot.rgb)
+        original_width, original_height = img.size
+
+        geometry = f"{monitor['width']}x{monitor['height']}+{monitor['left']}+{monitor['top']}"
+
+        if img.width != monitor['width']:
+            img = img.resize((monitor['width'], monitor['height']), Image.Resampling.LANCZOS)
+            scale_x = original_width / monitor['width']
+            scale_y = original_height / monitor['height']
+        else:
+            scale_x = 1
+            scale_y = 1
+
+        self._create_selection_window(img, geometry, scale_x, scale_y, monitor)
+
+    def start(self):
+        while True:
+            try:
+                image = self.command_queue.get(timeout=0.1)
+            except queue.Empty:
+                continue
+
+            if image == False:
+                break
+            if image == True:
+                self.on_select(None, None)
+                continue
+
+            self.root = tk.Tk()
+
+            if not self.mac_init_done and sys.platform == 'darwin':
+                app = NSApplication.sharedApplication()
+                app.setActivationPolicy_(NSApplicationActivationPolicyAccessory)
+                self.mac_init_done = True
+
+            self.root.withdraw()
+
+            if image:
+                self.create_window_from_image(image)
+            else:
+                for monitor in self.monitors:
+                    self.create_window(monitor)
+
+            self.root.mainloop()
+            self.root.update()
+            self.root = None


-def run_screen_selector(result, input_image=None):
-    selector = ScreenSelector(result, input_image)
+def run_screen_selector(result_queue, command_queue):
+    selector = ScreenSelector(result_queue, command_queue)
    selector.start()

+selector_process = None
+result_queue = None
+command_queue = None
+
+def get_screen_selection(pil_image, permanent_process):
+    global selector_process, result_queue, command_queue

-def get_screen_selection(pil_image = None):
    if not selector_available:
        raise ValueError('tkinter or PIL with tkinter support are not installed, unable to open picker')

-    with Manager() as manager:
-        res = manager.dict()
-        process = Process(target=run_screen_selector, args=(res, pil_image))
-        
-        process.start()    
-        process.join()
+    if selector_process is None or not selector_process.is_alive():
+        result_queue = multiprocessing.Queue()
+        command_queue = multiprocessing.Queue()
+        selector_process = multiprocessing.Process(target=run_screen_selector, args=(result_queue, command_queue))
+        selector_process.daemon = True
+        selector_process.start()

-        if 'monitor' in res and 'coordinates' in res:
-            return res.copy()
-        else:
-            return False
+    command_queue.put(pil_image)
+
+    result = False
+    while (not result) and selector_process.is_alive():
+        try:
+            result = result_queue.get(timeout=0.1)  # 60 second timeout
+        except:
+            continue
+    if not permanent_process:
+        command_queue.put(False)
+        selector_process.join()
+    return result