Make second pass multithreaded and refactor coordinate picker to be permanent if needed

2025-10-12 22:26:45 +02:00
parent 14066bdc95
commit 68626c44a2
3 changed files with 231 additions and 135 deletions
--- a/owocr/ocr.py
+++ b/owocr/ocr.py
@@ -200,6 +200,7 @@ class MangaOcr:
    local = True
    manual_language = False
    coordinate_support = False
    threading_support = True
    def __init__(self, config={'pretrained_model_name_or_path':'kha-white/manga-ocr-base','force_cpu': False}):
        if 'manga_ocr' not in sys.modules:
@@ -233,6 +234,7 @@ class GoogleVision:
    local = False
    manual_language = False
    coordinate_support = False
    threading_support = True
    def __init__(self):
        if 'google.cloud' not in sys.modules:
@@ -280,6 +282,7 @@ class GoogleLens:
    local = False
    manual_language = False
    coordinate_support = True
    threading_support = True
    def __init__(self):
        if 'betterproto' not in sys.modules:
@@ -427,6 +430,7 @@ class GoogleLensWeb:
    local = False
    manual_language = False
    coordinate_support = False
    threading_support = True
    def __init__(self):
        if 'pyjson5' not in sys.modules:
@@ -524,6 +528,7 @@ class Bing:
    local = False
    manual_language = False
    coordinate_support = True
    threading_support = True
    def __init__(self):
        self.requests_session = requests.Session()
@@ -704,6 +709,7 @@ class AppleVision:
    local = True
    manual_language = True
    coordinate_support = False
    threading_support = True
    def __init__(self, language='ja'):
        if sys.platform != 'darwin':
@@ -756,6 +762,7 @@ class AppleLiveText:
    local = True
    manual_language = True
    coordinate_support = True
    threading_support = False
    def __init__(self, language='ja'):
        if sys.platform != 'darwin':
@@ -897,6 +904,7 @@ class WinRTOCR:
    local = True
    manual_language = True
    coordinate_support = False
    threading_support = True
    def __init__(self, config={}, language='ja'):
        if sys.platform == 'win32':
@@ -955,6 +963,7 @@ class OneOCR:
    local = True
    manual_language = False
    coordinate_support = True
    threading_support = True
    def __init__(self, config={}):
        if sys.platform == 'win32':
@@ -1079,6 +1088,7 @@ class AzureImageAnalysis:
    local = False
    manual_language = False
    coordinate_support = False
    threading_support = True
    def __init__(self, config={}):
        if 'azure.ai.vision.imageanalysis' not in sys.modules:
@@ -1135,6 +1145,7 @@ class EasyOCR:
    local = True
    manual_language = True
    coordinate_support = False
    threading_support = True
    def __init__(self, config={'gpu': True}, language='ja'):
        if 'easyocr' not in sys.modules:
@@ -1173,6 +1184,7 @@ class RapidOCR:
    local = True
    manual_language = True
    coordinate_support = False
    threading_support = True
    def __init__(self, config={'high_accuracy_detection': False, 'high_accuracy_recognition': True}, language='ja'):
        if 'rapidocr' not in sys.modules:
@@ -1238,6 +1250,7 @@ class OCRSpace:
    local = False
    manual_language = True
    coordinate_support = False
    threading_support = True
    def __init__(self, config={}, language='ja'):
        try:
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -783,6 +783,7 @@ class ScreenshotThread(threading.Thread):
    def __init__(self):
        super().__init__(daemon=True)
        screen_capture_area = config.get_general('screen_capture_area')
        self.coordinate_selector_combo_enabled = config.get_general('coordinate_selector_combo') != ''
        self.is_combo_screenshot = False
        self.macos_window_tracker_instance = None
        self.windows_window_tracker_instance = None
@@ -801,6 +802,9 @@ class ScreenshotThread(threading.Thread):
        else:
            self.screencapture_mode = 2
        if self.coordinate_selector_combo_enabled:
            self.launch_coordinate_picker(True, False)
        if self.screencapture_mode != 2:
            self.sct = mss.mss()
@@ -815,7 +819,7 @@ class ScreenshotThread(threading.Thread):
            elif self.screencapture_mode == 3:
                coord_left, coord_top, coord_width, coord_height = [int(c.strip()) for c in screen_capture_area.split(',')]
            else:
-                self.launch_coordinate_picker(True)
+                self.launch_coordinate_picker(False, True)
            if self.screencapture_mode != 0:
                self.sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height}
@@ -881,7 +885,7 @@ class ScreenshotThread(threading.Thread):
                    logger.opt(ansi=True).info(f'Selected window coordinates: {x},{y},{x2},{y2}')
                    self.window_area_coordinates = (img.size, (x, y, x2, y2))
                elif screen_capture_window_area == '':
-                    self.launch_coordinate_picker(True)
+                    self.launch_coordinate_picker(False, False)
                else:
                    raise ValueError('"screen_capture_window_area" must be empty, "window" for the whole window, or a valid set of coordinates')
@@ -1066,10 +1070,14 @@ class ScreenshotThread(threading.Thread):
        else:
            periodic_screenshot_queue.put(result)
-    def launch_coordinate_picker(self, on_init):
+    def launch_coordinate_picker(self, init, must_return):
        if init:
            logger.opt(ansi=True).info('Preloading screen coordinate picker')
            get_screen_selection(True, True)
            return
        if self.screencapture_mode != 2:
            logger.opt(ansi=True).info('Launching screen coordinate picker')
-            screen_selection = get_screen_selection()
+            screen_selection = get_screen_selection(None, self.coordinate_selector_combo_enabled)
            if not screen_selection:
                if on_init:
                    raise ValueError('Picker window was closed or an error occurred')
@@ -1093,7 +1101,7 @@ class ScreenshotThread(threading.Thread):
            self.window_area_coordinates = None
            img = self.take_screenshot()
            logger.opt(ansi=True).info('Launching window coordinate picker')
-            window_selection = get_screen_selection(img)
+            window_selection = get_screen_selection(img, self.coordinate_selector_combo_enabled)
            if not window_selection:
                logger.opt(ansi=True).warning('Picker window was closed or an error occurred, selecting whole window')
            else:
@@ -1112,7 +1120,7 @@ class ScreenshotThread(threading.Thread):
        while not terminated:
            if not screenshot_event.wait(timeout=0.1):
                if coordinate_selector_event.is_set():
-                    self.launch_coordinate_picker(False)
+                    self.launch_coordinate_picker(False, False)
                    coordinate_selector_event.clear()
                continue
@@ -1130,33 +1138,77 @@ class ScreenshotThread(threading.Thread):
            self.windows_window_tracker_instance.join()
 class SecondPassThread:
    def __init__(self):
        self.input_queue = queue.Queue()
        self.output_queue = queue.Queue()
        self.ocr_thread = None
        self.running = False
    def __del__(self):
        self.stop()
    def start(self):
        if self.ocr_thread is None or not self.ocr_thread.is_alive():
            self.running = True
            self.ocr_thread = threading.Thread(target=self._process_ocr, daemon=True)
            self.ocr_thread.start()
    def stop(self):
        self.running = False
        if self.ocr_thread and self.ocr_thread.is_alive():
            self.ocr_thread.join()
    def _process_ocr(self):
        while self.running and not terminated:
            try:
                img, engine_instance = self.input_queue.get(timeout=0.1)
                start_time = time.time()
                res, result_data = engine_instance(img)
                end_time = time.time()
                self.output_queue.put((res, result_data, end_time - start_time))
            except queue.Empty:
                continue
    def submit_task(self, img, engine_instance):
        self.input_queue.put((img, engine_instance))
    def get_result(self):
        try:
            return self.output_queue.get_nowait()
        except queue.Empty:
            return None
 class AutopauseTimer:
    def __init__(self, timeout):
        self.stop_event = threading.Event()
        self.timeout = timeout
        self.timer_thread = None
        self.running = False
    def __del__(self):
        self.stop()
    def start(self):
        self.stop()
-        self.stop_event.clear()
+        self.running = True
        self.timer_thread = threading.Thread(target=self._countdown)
        self.timer_thread.start()
    def stop(self):
-        if not self.stop_event.is_set() and self.timer_thread and self.timer_thread.is_alive():
+        if self.running and self.timer_thread and self.timer_thread.is_alive():
-            self.stop_event.set()
+            self.running = False
            self.timer_thread.join()
    def _countdown(self):
        seconds = self.timeout
-        while seconds > 0 and not self.stop_event.is_set() and not terminated:
+        while seconds > 0 and self.running and not terminated:
            time.sleep(1)
            seconds -= 1
-        if not self.stop_event.is_set():
+        if self.running:
-            self.stop_event.set()
+            self.running = False
            if not (paused or terminated):
                pause_handler(True)
@@ -1164,6 +1216,10 @@ class AutopauseTimer:
 class OutputResult:
    def __init__(self):
        self.filtering = TextFiltering()
        self.second_pass_thread = SecondPassThread()
    def __del__(self):
        self.second_pass_thread.stop()
    def _post_process(self, text, strip_spaces):
        is_cj_text = self.filtering.cj_regex.search(''.join(text))
@@ -1195,6 +1251,7 @@ class OutputResult:
        two_pass_processing_active = False
        if filter_text and engine_index_2 != -1 and engine_index_2 != engine_index:
            self.second_pass_thread.start()
            engine_instance_2 = engine_instances[engine_index_2]
            start_time = time.time()
            res2, result_data_2 = engine_instance_2(img_or_path)
@@ -1212,9 +1269,19 @@ class OutputResult:
                    if output_format != 'json':
                        if changed_regions_image:
                            img_or_path = changed_regions_image
                else:
                    return
                    if engine_instance.threading_support:
                        self.second_pass_thread.submit_task(img_or_path, engine_instance)
        else:
            self.second_pass_thread.stop()
        second_pass_result = self.second_pass_thread.get_result()
        if second_pass_result:
            res, result_data, processing_time = second_pass_result
            two_pass_processing_active = True
        elif two_pass_processing_active and engine_instance.threading_support:
            return
        else:
            start_time = time.time()
            res, result_data = engine_instance(img_or_path)
            end_time = time.time()
--- a/owocr/screen_coordinate_picker.py
+++ b/owocr/screen_coordinate_picker.py
@@ -1,6 +1,12 @@
-from multiprocessing import Process, Manager
+import multiprocessing
 import queue
 import mss
 from PIL import Image
 import sys
 try:
    from AppKit import NSApplication, NSApplicationActivationPolicyAccessory
 except ImportError:
    pass
 try:
    from PIL import ImageTk
@@ -11,105 +17,20 @@ except:
 class ScreenSelector:
-    def __init__(self, result, input_image=None):
+    def __init__(self, result_queue, command_queue):
        self.sct = mss.mss()
        self.monitors = self.sct.monitors[1:]
        self.root = None
-        self.result = result
+        self.result_queue = result_queue
-        self.input_image = input_image
+        self.command_queue = command_queue
        self.mac_init_done = False
    def on_select(self, monitor, coordinates):
-        self.result['monitor'] = monitor
+        self.result_queue.put({'monitor': monitor, 'coordinates': coordinates})
-        self.result['coordinates'] = coordinates
+        if self.root:
            self.root.destroy()
-    def create_window_from_image(self, img):
+    def _setup_selection_canvas(self, canvas, img_tk, scale_x=1, scale_y=1, monitor=None):
        original_width, original_height = img.size
        display_monitor = None
        for monitor in self.monitors:
            if (monitor['width'] >= original_width and 
                monitor['height'] >= original_height):
                display_monitor = monitor
                break
        if not display_monitor:
            display_monitor = self.monitors[0]
        window_width = min(original_width, display_monitor['width'])
        window_height = min(original_height, display_monitor['height'])
        left = display_monitor['left'] + (display_monitor['width'] - window_width) // 2
        top = display_monitor['top'] + (display_monitor['height'] - window_height) // 2
        window = tk.Toplevel(self.root)
        window.geometry(f"{window_width}x{window_height}+{left}+{top}")
        window.overrideredirect(1)
        window.attributes('-topmost', 1)
        # Resize image if it's larger than the window
        if img.width > window_width or img.height > window_height:
            img = img.resize((window_width, window_height), Image.Resampling.LANCZOS)
            scale_x = original_width / window_width
            scale_y = original_height / window_height
        else:
            scale_x = 1
            scale_y = 1
        img_tk = ImageTk.PhotoImage(img)
        canvas = tk.Canvas(window, cursor='cross', highlightthickness=0)
        canvas.pack(fill=tk.BOTH, expand=True)
        canvas.image = img_tk
        canvas.create_image(0, 0, image=img_tk, anchor=tk.NW)
        start_x, start_y, rect = None, None, None
        def on_click(event):
            nonlocal start_x, start_y, rect
            start_x, start_y = event.x, event.y
            rect = canvas.create_rectangle(start_x, start_y, start_x, start_y, outline='red')
        def on_drag(event):
            nonlocal rect, start_x, start_y
            if rect:
                canvas.coords(rect, start_x, start_y, event.x, event.y)
        def on_release(event):
            nonlocal start_x, start_y, scale_x, scale_y
            end_x, end_y = event.x, event.y
            x1 = min(start_x, end_x) 
            y1 = min(start_y, end_y) 
            x2 = max(start_x, end_x) 
            y2 = max(start_y, end_y)
            x1 = int(x1 * scale_x)
            y1 = int(y1 * scale_y)
            x2 = int(x2 * scale_x)
            y2 = int(y2 * scale_y)
            # Return None for monitor when using input image
            self.on_select(None, (x1, y1, x2 - x1, y2 - y1))
        canvas.bind('<ButtonPress-1>', on_click)
        canvas.bind('<B1-Motion>', on_drag)
        canvas.bind('<ButtonRelease-1>', on_release)
    def create_window(self, monitor):
        screenshot = self.sct.grab(monitor)
        img = Image.frombytes('RGB', screenshot.size, screenshot.rgb)
        if img.width != monitor['width']:
            img = img.resize((monitor['width'], monitor['height']), Image.Resampling.LANCZOS)
        window = tk.Toplevel(self.root)
        window.geometry(f"{monitor['width']}x{monitor['height']}+{monitor['left']}+{monitor['top']}")
        window.overrideredirect(1)
        window.attributes('-topmost', 1)
        img_tk = ImageTk.PhotoImage(img)
        canvas = tk.Canvas(window, cursor='cross', highlightthickness=0)
        canvas.pack(fill=tk.BOTH, expand=True)
        canvas.image = img_tk
        canvas.create_image(0, 0, image=img_tk, anchor=tk.NW)
@@ -135,43 +56,138 @@ class ScreenSelector:
            x2 = max(start_x, end_x) 
            y2 = max(start_y, end_y)
            x1 = int(x1 * scale_x)
            y1 = int(y1 * scale_y)
            x2 = int(x2 * scale_x)
            y2 = int(y2 * scale_y)
            self.on_select(monitor, (x1, y1, x2 - x1, y2 - y1))
        canvas.bind('<ButtonPress-1>', on_click)
        canvas.bind('<B1-Motion>', on_drag)
        canvas.bind('<ButtonRelease-1>', on_release)
    def _create_selection_window(self, img, geometry, scale_x=1, scale_y=1, monitor=None):
        window = tk.Toplevel(self.root)
        window.geometry(geometry)
        window.overrideredirect(1)
        window.attributes('-topmost', 1)
        img_tk = ImageTk.PhotoImage(img)
        canvas = tk.Canvas(window, cursor='cross', highlightthickness=0)
        self._setup_selection_canvas(canvas, img_tk, scale_x, scale_y, monitor)
    def create_window_from_image(self, img):
        original_width, original_height = img.size
        display_monitor = None
        for monitor in self.monitors:
            if (monitor['width'] >= original_width and 
                monitor['height'] >= original_height):
                display_monitor = monitor
                break
        if not display_monitor:
            display_monitor = self.monitors[0]
        window_width = min(original_width, display_monitor['width'])
        window_height = min(original_height, display_monitor['height'])
        left = display_monitor['left'] + (display_monitor['width'] - window_width) // 2
        top = display_monitor['top'] + (display_monitor['height'] - window_height) // 2
        geometry = f"{window_width}x{window_height}+{left}+{top}"
        if img.width > window_width or img.height > window_height:
            img = img.resize((window_width, window_height), Image.Resampling.LANCZOS)
            scale_x = original_width / window_width
            scale_y = original_height / window_height
        else:
            scale_x = 1
            scale_y = 1
        self._create_selection_window(img, geometry, scale_x, scale_y, None)
    def create_window(self, monitor):
        screenshot = self.sct.grab(monitor)
        img = Image.frombytes('RGB', screenshot.size, screenshot.rgb)
        original_width, original_height = img.size
        geometry = f"{monitor['width']}x{monitor['height']}+{monitor['left']}+{monitor['top']}"
        if img.width != monitor['width']:
            img = img.resize((monitor['width'], monitor['height']), Image.Resampling.LANCZOS)
            scale_x = original_width / monitor['width']
            scale_y = original_height / monitor['height']
        else:
            scale_x = 1
            scale_y = 1
        self._create_selection_window(img, geometry, scale_x, scale_y, monitor)
    def start(self):
        while True:
            try:
                image = self.command_queue.get(timeout=0.1)
            except queue.Empty:
                continue
            if image == False:
                break
            if image == True:
                self.on_select(None, None)
                continue
            self.root = tk.Tk()
            if not self.mac_init_done and sys.platform == 'darwin':
                app = NSApplication.sharedApplication()
                app.setActivationPolicy_(NSApplicationActivationPolicyAccessory)
                self.mac_init_done = True
            self.root.withdraw()
-        if self.input_image:
+            if image:
-            self.create_window_from_image(self.input_image)
+                self.create_window_from_image(image)
            else:
                for monitor in self.monitors:
                    self.create_window(monitor)
            self.root.mainloop()
            self.root.update()
            self.root = None
-def run_screen_selector(result, input_image=None):
+def run_screen_selector(result_queue, command_queue):
-    selector = ScreenSelector(result, input_image)
+    selector = ScreenSelector(result_queue, command_queue)
    selector.start()
 selector_process = None
 result_queue = None
 command_queue = None
 def get_screen_selection(pil_image, permanent_process):
    global selector_process, result_queue, command_queue
 def get_screen_selection(pil_image = None):
    if not selector_available:
        raise ValueError('tkinter or PIL with tkinter support are not installed, unable to open picker')
-    with Manager() as manager:
+    if selector_process is None or not selector_process.is_alive():
-        res = manager.dict()
+        result_queue = multiprocessing.Queue()
-        process = Process(target=run_screen_selector, args=(res, pil_image))
+        command_queue = multiprocessing.Queue()
        selector_process = multiprocessing.Process(target=run_screen_selector, args=(result_queue, command_queue))
        selector_process.daemon = True
        selector_process.start()
-        process.start()    
+    command_queue.put(pil_image)
        process.join()
-        if 'monitor' in res and 'coordinates' in res:
+    result = False
-            return res.copy()
+    while (not result) and selector_process.is_alive():
-        else:
+        try:
-            return False
+            result = result_queue.get(timeout=0.1)  # 60 second timeout
        except:
            continue
    if not permanent_process:
        command_queue.put(False)
        selector_process.join()
    return result