Big refactoring, allow second image source

2025-05-04 08:37:54 +02:00
parent 7a307f4cb9
commit e48f388755
3 changed files with 252 additions and 259 deletions
--- a/owocr/config.py
+++ b/owocr/config.py
@@ -12,6 +12,8 @@ parser = argparse.ArgumentParser(prog='owocr', description=textwrap.dedent('''\
 parser.add_argument('-r', '--read_from', type=str, default=argparse.SUPPRESS,
                    help='Where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.')
 parser.add_argument('-rs', '--read_from_secondary', type=str, default=argparse.SUPPRESS,
                    help='Where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.')
 parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS,
                    help='Where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.')
 parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS,
@@ -47,6 +49,7 @@ class Config:
    __engine_config = {}
    __default_config = {
        'read_from': 'clipboard',
        'read_from_secondary': None,
        'write_to': 'clipboard',
        'engine': '',
        'pause_at_startup': False,
--- a/owocr/ocr.py
+++ b/owocr/ocr.py
@@ -96,6 +96,22 @@ def post_process(text):
    return text
 def input_to_pil_image(img):
    if isinstance(img, Image.Image):
        pil_image = img
    elif isinstance(img, (bytes, bytearray)):
        pil_image = Image.open(io.BytesIO(img))
    elif isinstance(img, Path):
        try:
            pil_image = Image.open(img)
            pil_image.load()
        except (UnidentifiedImageError, OSError) as e:
            return None
    else:
        raise ValueError(f'img must be a path, PIL.Image or bytes object, instead got: {img}')
    return pil_image
 def pil_image_to_bytes(img, img_format='png', png_compression=6, jpeg_quality=80, optimize=False):
    if img_format == 'png' and optimized_png_encode and not optimize:
        raw_data = img.convert('RGBA').tobytes()
@@ -157,15 +173,14 @@ class MangaOcr:
            self.available = True
            logger.info('Manga OCR ready')
-    def __call__(self, img_or_path):
+    def __call__(self, img):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
+        img = input_to_pil_image(img)
-            img = Image.open(img_or_path)
+        if not img:
-        elif isinstance(img_or_path, Image.Image):
+            return (False, 'Invalid image provided')
            img = img_or_path
        else:
            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
        x = (True, self.model(img))
        img.close()
        return x
 class GoogleVision:
@@ -188,13 +203,10 @@ class GoogleVision:
            except:
                logger.warning('Error parsing Google credentials, Google Vision will not work!')
-    def __call__(self, img_or_path):
+    def __call__(self, img):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
+        img = input_to_pil_image(img)
-            img = Image.open(img_or_path)
+        if not img:
-        elif isinstance(img_or_path, Image.Image):
+            return (False, 'Invalid image provided')
            img = img_or_path
        else:
            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
        image_bytes = self._preprocess(img)
        image = vision.Image(content=image_bytes)
@@ -207,6 +219,8 @@ class GoogleVision:
        texts = response.text_annotations
        res = texts[0].description if len(texts) > 0 else ''
        x = (True, res)
        img.close()
        return x
    def _preprocess(self, img):
@@ -225,13 +239,10 @@ class GoogleLens:
            self.available = True
            logger.info('Google Lens ready')
-    def __call__(self, img_or_path):
+    def __call__(self, img):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
+        img = input_to_pil_image(img)
-            img = Image.open(img_or_path)
+        if not img:
-        elif isinstance(img_or_path, Image.Image):
+            return (False, 'Invalid image provided')
            img = img_or_path
        else:
            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
        request = LensOverlayServerRequest()
@@ -298,6 +309,8 @@ class GoogleLens:
                res += '\n'
        x = (True, res)
        img.close()
        return x
    def _preprocess(self, img):
@@ -305,9 +318,10 @@ class GoogleLens:
            aspect_ratio = img.width / img.height
            new_w = int(sqrt(3000000 * aspect_ratio))
            new_h = int(new_w / aspect_ratio)
-            img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+            img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
            img.close()
-        return (pil_image_to_bytes(img), img.width, img.height)
+        return (pil_image_to_bytes(img_resized), img_resized.width, img_resized.height)
 class GoogleLensWeb:
    name = 'glensweb'
@@ -323,13 +337,10 @@ class GoogleLensWeb:
            self.available = True
            logger.info('Google Lens (web) ready')
-    def __call__(self, img_or_path):
+    def __call__(self, img):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
+        img = input_to_pil_image(img)
-            img = Image.open(img_or_path)
+        if not img:
-        elif isinstance(img_or_path, Image.Image):
+            return (False, 'Invalid image provided')
            img = img_or_path
        else:
            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
        url = 'https://lens.google.com/v3/upload'
        files = {'encoded_image': ('image.png', self._preprocess(img), 'image/png')}
@@ -393,6 +404,8 @@ class GoogleLensWeb:
            res += '\n'
        x = (True, res)
        img.close()
        return x
    def _preprocess(self, img):
@@ -400,9 +413,10 @@ class GoogleLensWeb:
            aspect_ratio = img.width / img.height
            new_w = int(sqrt(3000000 * aspect_ratio))
            new_h = int(new_w / aspect_ratio)
-            img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+            img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
            img.close()
-        return pil_image_to_bytes(img)
+        return pil_image_to_bytes(img_resized)
 class Bing:
    name = 'bing'
@@ -415,13 +429,10 @@ class Bing:
        self.available = True
        logger.info('Bing ready')
-    def __call__(self, img_or_path):
+    def __call__(self, img):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
+        img = input_to_pil_image(img)
-            img = Image.open(img_or_path)
+        if not img:
-        elif isinstance(img_or_path, Image.Image):
+            return (False, 'Invalid image provided')
            img = img_or_path
        else:
            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
        img_bytes = self._preprocess(img)
        if not img_bytes:
@@ -515,6 +526,8 @@ class Bing:
                        res += line['text'] + '\n'
        x = (True, res)
        img.close()
        return x
    def _preprocess(self, img):
@@ -526,9 +539,10 @@ class Bing:
            resize_factor = max(max_pixel_size / img.width, max_pixel_size / img.height)
            new_w = int(img.width * resize_factor)
            new_h = int(img.height * resize_factor)
-            img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+            img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
            img.close()
-        img_bytes, _ = limit_image_size(img, max_byte_size)
+        img_bytes, _ = limit_image_size(img_resized, max_byte_size)
        if img_bytes:
            res = base64.b64encode(img_bytes).decode('utf-8')
@@ -550,13 +564,10 @@ class AppleVision:
            self.available = True
            logger.info('Apple Vision ready')
-    def __call__(self, img_or_path):
+    def __call__(self, img):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
+        img = input_to_pil_image(img)
-            img = Image.open(img_or_path)
+        if not img:
-        elif isinstance(img_or_path, Image.Image):
+            return (False, 'Invalid image provided')
            img = img_or_path
        else:
            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
        with objc.autorelease_pool():
            req = Vision.VNRecognizeTextRequest.alloc().init()
@@ -579,6 +590,7 @@ class AppleVision:
            else:
                x = (False, 'Unknown error!')
            img.close()
            return x
    def _preprocess(self, img):
@@ -631,13 +643,10 @@ class AppleLiveText:
            self.available = True
            logger.info('Apple Live Text ready')
-    def __call__(self, img_or_path):
+    def __call__(self, img):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
+        img = input_to_pil_image(img)
-            img = Image.open(img_or_path)
+        if not img:
-        elif isinstance(img_or_path, Image.Image):
+            return (False, 'Invalid image provided')
            img = img_or_path
        else:
            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
        with objc.autorelease_pool():
            analyzer = self.VKCImageAnalyzer.alloc().init()
@@ -691,13 +700,10 @@ class WinRTOCR:
            except:
                logger.warning('Error reading URL from config, WinRT OCR will not work!')
-    def __call__(self, img_or_path):
+    def __call__(self, img):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
+        img = input_to_pil_image(img)
-            img = Image.open(img_or_path)
+        if not img:
-        elif isinstance(img_or_path, Image.Image):
+            return (False, 'Invalid image provided')
            img = img_or_path
        else:
            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
        if sys.platform == 'win32':
            res = winocr.recognize_pil_sync(img, lang='ja')['text']
@@ -716,6 +722,8 @@ class WinRTOCR:
            res = res.json()['text']
        x = (True, res)
        img.close()
        return x
    def _preprocess(self, img):
@@ -749,13 +757,10 @@ class OneOCR:
            except:
                logger.warning('Error reading URL from config, OneOCR will not work!')
-    def __call__(self, img_or_path):
+    def __call__(self, img):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
+        img = input_to_pil_image(img)
-            img = Image.open(img_or_path)
+        if not img:
-        elif isinstance(img_or_path, Image.Image):
+            return (False, 'Invalid image provided')
            img = img_or_path
        else:
            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
        if sys.platform == 'win32':
            try:
@@ -776,6 +781,8 @@ class OneOCR:
            res = res.json()['text']
        x = (True, res)
        img.close()
        return x
    def _preprocess(self, img):
@@ -799,13 +806,10 @@ class AzureImageAnalysis:
            except:
                logger.warning('Error parsing Azure credentials, Azure Image Analysis will not work!')
-    def __call__(self, img_or_path):
+    def __call__(self, img):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
+        img = input_to_pil_image(img)
-            img = Image.open(img_or_path)
+        if not img:
-        elif isinstance(img_or_path, Image.Image):
+            return (False, 'Invalid image provided')
            img = img_or_path
        else:
            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
        try:
            read_result = self.client.analyze(image_data=self._preprocess(img), visual_features=[VisualFeatures.READ])
@@ -823,6 +827,8 @@ class AzureImageAnalysis:
            return (False, 'Unknown error!')
        x = (True, res)
        img.close()
        return x
    def _preprocess(self, img):
@@ -830,9 +836,10 @@ class AzureImageAnalysis:
            resize_factor = max(50 / img.width, 50 / img.height)
            new_w = int(img.width * resize_factor)
            new_h = int(img.height * resize_factor)
-            img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+            img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
            img.close()
-        return pil_image_to_bytes(img)
+        return pil_image_to_bytes(img_resized)
 class EasyOCR:
    name = 'easyocr'
@@ -850,13 +857,10 @@ class EasyOCR:
            self.available = True
            logger.info('EasyOCR ready')
-    def __call__(self, img_or_path):
+    def __call__(self, img):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
+        img = input_to_pil_image(img)
-            img = Image.open(img_or_path)
+        if not img:
-        elif isinstance(img_or_path, Image.Image):
+            return (False, 'Invalid image provided')
            img = img_or_path
        else:
            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
        res = ''
        read_result = self.model.readtext(self._preprocess(img), detail=0)
@@ -864,6 +868,8 @@ class EasyOCR:
            res += text + '\n'
        x = (True, res)
        img.close()
        return x
    def _preprocess(self, img):
@@ -897,13 +903,10 @@ class RapidOCR:
            self.available = True
            logger.info('RapidOCR ready')
-    def __call__(self, img_or_path):
+    def __call__(self, img):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
+        img = input_to_pil_image(img)
-            img = Image.open(img_or_path)
+        if not img:
-        elif isinstance(img_or_path, Image.Image):
+            return (False, 'Invalid image provided')
            img = img_or_path
        else:
            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
        res = ''
        read_results, elapsed = self.model(self._preprocess(img))
@@ -912,6 +915,8 @@ class RapidOCR:
                res += read_result[1] + '\n'
        x = (True, res)
        img.close()
        return x
    def _preprocess(self, img):
@@ -932,13 +937,10 @@ class OCRSpace:
        except:
            logger.warning('Error reading API key from config, OCRSpace will not work!')
-    def __call__(self, img_or_path):
+    def __call__(self, img):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
+        img = input_to_pil_image(img)
-            img = Image.open(img_or_path)
+        if not img:
-        elif isinstance(img_or_path, Image.Image):
+            return (False, 'Invalid image provided')
            img = img_or_path
        else:
            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
        img_bytes, img_extension = self._preprocess(img)
        if not img_bytes:
@@ -969,6 +971,8 @@ class OCRSpace:
        res = res['ParsedResults'][0]['ParsedText']
        x = (True, res)
        img.close()
        return x
    def _preprocess(self, img):       
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -93,7 +93,7 @@ class ClipboardThread(threading.Thread):
            1.0
        )
-        return new_image.TIFFRepresentation()
+        return bytes(new_image.TIFFRepresentation())
    def process_message(self, hwnd: int, msg: int, wparam: int, lparam: int):
        WM_CLIPBOARDUPDATE = 0x031D
@@ -114,7 +114,7 @@ class ClipboardThread(threading.Thread):
                        clipboard_text = win32clipboard.GetClipboardData(win32clipboard.CF_UNICODETEXT)
                    if self.ignore_flag or clipboard_text != '*ocr_ignore*':
                        img = win32clipboard.GetClipboardData(win32clipboard.CF_DIB)
-                        clipboard_queue.put(img)
+                        image_queue.put((img, False))
                win32clipboard.CloseClipboard()
            except pywintypes.error:
                pass
@@ -165,7 +165,7 @@ class ClipboardThread(threading.Thread):
                                        clipboard_text = pasteboard.stringForType_(NSPasteboardTypeString)
                                    if self.ignore_flag or clipboard_text != '*ocr_ignore*':
                                        img = self.normalize_macos_clipboard(pasteboard.dataForType_(NSPasteboardTypeTIFF))
-                                        clipboard_queue.put(img)
+                                        image_queue.put((img, False))
                    else:
                        old_img = img
                        try:
@@ -176,7 +176,7 @@ class ClipboardThread(threading.Thread):
                            if ((not just_unpaused) and isinstance(img, Image.Image) and \
                                (self.ignore_flag or pyperclipfix.paste() != '*ocr_ignore*') and \
                                (not self.are_images_identical(img, old_img))):
-                                clipboard_queue.put(img)
+                                image_queue.put((img, False))
                    just_unpaused = False
@@ -184,6 +184,41 @@ class ClipboardThread(threading.Thread):
                    time.sleep(sleep_time)
 class DirectoryWatcher(threading.Thread):
    def __init__(self, path):
        super().__init__(daemon=True)
        self.path = path
        self.delay_secs = config.get_general('delay_secs')
        self.last_update = time.time()
        self.allowed_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.webp')
    def get_path_key(self, path):
        return path, path.lstat().st_mtime
    def run(self):
        old_paths = set()
        for path in self.path.iterdir():
            if path.suffix.lower() in self.allowed_extensions:
                old_paths.add(get_path_key(path))
        while not terminated:
            if paused:
                sleep_time = 0.5
            else:
                sleep_time = self.delay_secs
                for path in self.path.iterdir():
                    if path.suffix.lower() in self.allowed_extensions:
                        path_key = self.get_path_key(path)
                        if path_key not in old_paths:
                            old_paths.add(path_key)
                            if not paused:
                                image_queue.put((path, False))
            if not terminated:
                time.sleep(sleep_time)
 class WebsocketServerThread(threading.Thread):
    def __init__(self, read):
        super().__init__(daemon=True)
@@ -206,7 +241,7 @@ class WebsocketServerThread(threading.Thread):
        try:
            async for message in websocket:
                if self.read and not paused:
-                    websocket_queue.put(message)
+                    image_queue.put((message, False))
                    try:
                        await websocket.send('True')
                    except websockets.exceptions.ConnectionClosedOK:
@@ -255,7 +290,7 @@ class RequestHandler(socketserver.BaseRequestHandler):
            pass
        if not paused:
-            unixsocket_queue.put(img)
+            image_queue.put((img, False))
            conn.sendall(b'True')
        else:
            conn.sendall(b'False')
@@ -266,7 +301,7 @@ class MacOSWindowTracker(threading.Thread):
        super().__init__(daemon=True)
        self.stop = False
        self.window_id = window_id
-        self.window_active = False
+        self.window_active = screencapture_window_active
    def run(self):
        found = True
@@ -302,8 +337,8 @@ class WindowsWindowTracker(threading.Thread):
        self.stop = False
        self.window_handle = window_handle
        self.only_active = only_active
-        self.window_active = False
+        self.window_active = screencapture_window_active
-        self.window_minimized = False
+        self.window_minimized = not screencapture_window_visible
    def run(self):
        found = True
@@ -463,14 +498,8 @@ class TextFiltering:
 class ScreenshotClass:
-    def __init__(self, screen_capture_on_combo):
+    def __init__(self):
        screen_capture_area = config.get_general('screen_capture_area')
        if type(screen_capture_area) == tuple:
            screen_capture_area = ','.join(map(str, screen_capture_area))
        global screencapture_window_active
        global screencapture_window_visible
        screencapture_window_active = True
        screencapture_window_visible = True
        self.macos_window_tracker = None
        self.windows_window_tracker = None
        if screen_capture_area == '':
@@ -519,7 +548,7 @@ class ScreenshotClass:
            self.sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height}
            logger.opt(ansi=True).info(f'Selected coordinates: {coord_left},{coord_top},{coord_width},{coord_height}')
        else:
-            screen_capture_only_active_windows = (not screen_capture_on_combo) and config.get_general('screen_capture_only_active_windows')
+            screen_capture_only_active_windows = config.get_general('screen_capture_only_active_windows')
            area_invalid_error = '"screen_capture_area" must be empty, "screen_N" where N is a screen number starting from 1, a valid set of coordinates, or a valid window name'
            if sys.platform == 'darwin':
                if int(platform.mac_ver()[0].split('.')[0]) < 14:
@@ -554,7 +583,6 @@ class ScreenshotClass:
                window_title = window_titles[window_index]
                if screen_capture_only_active_windows:
                    screencapture_window_active = False
                    self.macos_window_tracker = MacOSWindowTracker(self.window_id)
                    self.macos_window_tracker.start()
                logger.opt(ansi=True).info(f'Selected window: {window_title}')
@@ -566,8 +594,6 @@ class ScreenshotClass:
                ctypes.windll.shcore.SetProcessDpiAwareness(1)
                if screen_capture_only_active_windows:
                    screencapture_window_active = False
                self.windows_window_tracker = WindowsWindowTracker(self.window_handle, screen_capture_only_active_windows)
                self.windows_window_tracker.start()
                logger.opt(ansi=True).info(f'Selected window: {window_title}')
@@ -595,7 +621,7 @@ class ScreenshotClass:
                        except queue.Empty:
                            cg_image = None
                    if not cg_image:
-                        return None
+                        return 0
                    width = CGImageGetWidth(cg_image)
                    height = CGImageGetHeight(cg_image)
                    raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image))
@@ -620,7 +646,7 @@ class ScreenshotClass:
                    bmpinfo = save_bitmap.GetInfo()
                    bmpstr = save_bitmap.GetBitmapBits(True)
                except pywintypes.error:
-                    return None
+                    return 0
                img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
                try:
                    win32gui.DeleteObject(save_bitmap.GetHandle())
@@ -651,6 +677,9 @@ class AutopauseTimer:
        self.timeout = timeout
        self.timer_thread = None
    def __del__(self):
        self.stop()
    def start(self):
        self.stop()
        self.stop_event.clear()
@@ -664,12 +693,12 @@ class AutopauseTimer:
    def _countdown(self):
        seconds = self.timeout
-        while seconds > 0 and not self.stop_event.is_set():
+        while seconds > 0 and not self.stop_event.is_set() and not terminated:
            time.sleep(1)
            seconds -= 1
        if not self.stop_event.is_set():
            self.stop_event.set()
-            if not paused:
+            if not (paused or terminated):
                pause_handler(True)
@@ -775,7 +804,7 @@ def on_window_minimized(minimized):
 def on_screenshot_combo():
    if not paused:
        img = take_screenshot()
-        screenshot_queue.put(img)
+        image_queue.put((img, True))
 def process_and_write_results(img_or_path, last_result, filtering):
@@ -783,9 +812,9 @@ def process_and_write_results(img_or_path, last_result, filtering):
        auto_pause_handler.stop()
    engine_instance = engine_instances[engine_index]
-    t0 = time.time()
+    start_time = time.time()
    res, text = engine_instance(img_or_path)
-    t1 = time.time()
+    end_time = time.time()
    orig_text = []
    engine_color = config.get_general('engine_color')
@@ -793,7 +822,7 @@ def process_and_write_results(img_or_path, last_result, filtering):
        if filtering:
            text, orig_text = filtering(text, last_result)
        text = post_process(text)
-        logger.opt(ansi=True).info(f'Text recognized in {t1 - t0:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
+        logger.opt(ansi=True).info(f'Text recognized in {end_time - start_time:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
        if config.get_general('notifications'):
            notifier.send(title='owocr', message='Text recognized: ' + text)
@@ -809,15 +838,11 @@ def process_and_write_results(img_or_path, last_result, filtering):
        if auto_pause_handler and not paused:
            auto_pause_handler.start()
    else:
-        logger.opt(ansi=True).info(f'<{engine_color}>{engine_instance.readable_name}</{engine_color}> reported an error after {t1 - t0:0.03f}s: {text}')
+        logger.opt(ansi=True).info(f'<{engine_color}>{engine_instance.readable_name}</{engine_color}> reported an error after {end_time - start_time:0.03f}s: {text}')
    return orig_text
 def get_path_key(path):
    return path, path.lstat().st_mtime
 def run():
    logger.configure(handlers=[{'sink': sys.stderr, 'format': config.get_general('logger_format')}])
@@ -861,22 +886,33 @@ def run():
    global paused
    global notifier
    global auto_pause_handler
    global websocket_server_thread
    global image_queue
    non_path_inputs = ('screencapture', 'clipboard', 'websocket', 'unixsocket')
    read_from = config.get_general('read_from')
    read_from_secondary = config.get_general('read_from_secondary')
    read_from_path = None
    read_from_readable = []
    write_to = config.get_general('write_to')
    terminated = False
    paused = config.get_general('pause_at_startup')
    auto_pause = config.get_general('auto_pause')
    clipboard_thread = None
    websocket_server_thread = None
    directory_watcher_thread = None
    unix_socket_server = None
    key_combo_listener = None
    filtering = None
    auto_pause_handler = None
    engine_index = engine_keys.index(default_engine) if default_engine != '' else 0
    engine_color = config.get_general('engine_color')
    combo_pause = config.get_general('combo_pause')
    combo_engine_switch = config.get_general('combo_engine_switch')
    screen_capture_on_combo = False
    notifier = DesktopNotifierSync()
    image_queue = queue.Queue()
    key_combos = {}
    if read_from != 'screencapture' and auto_pause != 0:
        auto_pause_handler = AutopauseTimer(auto_pause)
    if combo_pause != '':
        key_combos[combo_pause] = pause_handler
    if combo_engine_switch != '':
@@ -885,64 +921,51 @@ def run():
        else:
            raise ValueError('combo_pause must also be specified')
-    if read_from == 'websocket' or write_to == 'websocket':
+    if 'websocket' in (read_from, read_from_secondary) or write_to == 'websocket':
-        global websocket_server_thread
+        websocket_server_thread = WebsocketServerThread('websocket' in (read_from, read_from_secondary))
        websocket_server_thread = WebsocketServerThread(read_from == 'websocket')
        websocket_server_thread.start()
-
+    if 'screencapture' in (read_from, read_from_secondary):
-    if read_from == 'websocket':
+        global screencapture_window_active
-        global websocket_queue
+        global screencapture_window_visible
-        websocket_queue = queue.Queue()
+        global take_screenshot
-        read_from_readable = 'websocket'
+        screencapture_window_active = False
-    elif read_from == 'unixsocket':
+        screencapture_window_visible = True
        screen_capture_delay_secs = config.get_general('screen_capture_delay_secs')
        screen_capture_combo = config.get_general('screen_capture_combo')
        last_screenshot_time = 0
        last_result = ([], engine_index)
        if screen_capture_combo != '':
            screen_capture_on_combo = True
            key_combos[screen_capture_combo] = on_screenshot_combo
        take_screenshot = ScreenshotClass()
        filtering = TextFiltering()
        read_from_readable.append('screen capture')
    if 'websocket' in (read_from, read_from_secondary):
        read_from_readable.append('websocket')
    if 'unixsocket' in (read_from, read_from_secondary):
        if sys.platform == 'win32':
            raise ValueError('"unixsocket" is not currently supported on Windows')
        global unixsocket_queue
        unixsocket_queue = queue.Queue()
        socket_path = Path('/tmp/owocr.sock')
        if socket_path.exists():
            socket_path.unlink()
        unix_socket_server = socketserver.ThreadingUnixStreamServer(str(socket_path), RequestHandler)
        unix_socket_server_thread = threading.Thread(target=unix_socket_server.serve_forever, daemon=True)
        unix_socket_server_thread.start()
-        read_from_readable = 'unix socket'
+        read_from_readable.append('unix socket')
-    elif read_from == 'clipboard':
+    if 'clipboard' in (read_from, read_from_secondary):
        global clipboard_queue
        clipboard_queue = queue.Queue()
        clipboard_thread = ClipboardThread()
        clipboard_thread.start()
-        read_from_readable = 'clipboard'
+        read_from_readable.append('clipboard')
-    elif read_from == 'screencapture':
+    if any(i and i not in non_path_inputs for i in (read_from, read_from_secondary)):
-        screen_capture_delay_secs = config.get_general('screen_capture_delay_secs')
+        if all(i and i not in non_path_inputs for i in (read_from, read_from_secondary)):
-        screen_capture_combo = config.get_general('screen_capture_combo')
+            raise ValueError("read_from and read_from_secondary can't both be directory paths")
        last_result = ([], engine_index)
        if screen_capture_combo != '':
            screen_capture_on_combo = True
            global screenshot_queue
            screenshot_queue = queue.Queue()
            key_combos[screen_capture_combo] = on_screenshot_combo
        else:
            screen_capture_on_combo = False
        global take_screenshot
        take_screenshot = ScreenshotClass(screen_capture_on_combo)
        filtering = TextFiltering()
        read_from_readable = 'screen capture'
    else:
        delay_secs = config.get_general('delay_secs')
        delete_images = config.get_general('delete_images')
-
+        read_from_path = Path(read_from) if read_from not in non_path_inputs else Path(read_from_secondary)
-        read_from = Path(read_from)
+        if not read_from_path.is_dir():
-        if not read_from.is_dir():
+            raise ValueError('read_from and read_from_secondary must be either "websocket", "unixsocket", "clipboard", "screencapture", or a path to a directory')
-            raise ValueError('read_from must be either "websocket", "unixsocket", "clipboard", "screencapture", or a path to a directory')
+        directory_watcher_thread = DirectoryWatcher(read_from_path)
-
+        directory_watcher_thread.start()
-        allowed_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.webp')
+        read_from_readable.append(f'directory {read_from_path}')
        old_paths = set()
        for path in read_from.iterdir():
            if path.suffix.lower() in allowed_extensions:
                old_paths.add(get_path_key(path))
        read_from_readable = f'directory {read_from}'
    if len(key_combos) > 0:
        key_combo_listener = keyboard.GlobalHotKeys(key_combos)
@@ -955,99 +978,62 @@ def run():
            raise ValueError('write_to must be either "websocket", "clipboard" or a path to a text file')
        write_to_readable = f'file {write_to}'
    process_queue = (any(i in ('clipboard', 'websocket', 'unixsocket') for i in (read_from, read_from_secondary)) or read_from_path or screen_capture_on_combo)
    process_screenshots = 'screencapture' in (read_from, read_from_secondary) and not screen_capture_on_combo
    signal.signal(signal.SIGINT, signal_handler)
    if (not process_screenshots) and auto_pause != 0:
        auto_pause_handler = AutopauseTimer(auto_pause)
    user_input_thread = threading.Thread(target=user_input_thread_run, daemon=True)
    user_input_thread.start()
-    logger.opt(ansi=True).info(f"Reading from {read_from_readable}, writing to {write_to_readable} using <{engine_color}>{engine_instances[engine_index].readable_name}</{engine_color}>{' (paused)' if paused else ''}")
+    logger.opt(ansi=True).info(f"Reading from {' and '.join(read_from_readable)}, writing to {write_to_readable} using <{engine_color}>{engine_instances[engine_index].readable_name}</{engine_color}>{' (paused)' if paused else ''}")
    while not terminated:
-        sleep_time = 0
+        start_time = time.time()
-        if read_from == 'websocket':
+        img = None
-            while True:
+        filter_img = False
-                try:
+
-                    item = websocket_queue.get(timeout=0.5)
+        if process_queue:
-                except queue.Empty:
+            try:
-                    break
+                img, filter_img = image_queue.get(timeout=0.1)
-                else:
+            except queue.Empty:
-                    if not paused:
+                pass
-                        img = Image.open(io.BytesIO(item))
+
-                        process_and_write_results(img, None, None)
+        if (not img) and process_screenshots:
-        elif read_from == 'unixsocket':
+            if (not paused) and screencapture_window_active and screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs:
-            while True:
+                img = take_screenshot()
-                try:
+                filter_img = True
-                    item = unixsocket_queue.get(timeout=0.5)
+                last_screenshot_time = time.time()
-                except queue.Empty:
+
-                    break
+        if img == 0:
-                else:
+            on_window_closed(False)
-                    img = Image.open(io.BytesIO(item))
+            terminated = True
-                    process_and_write_results(img, None, None)
+            break
-        elif read_from == 'clipboard':
+        elif img:
-            while True:
+            if filter_img:
                try:
                    item = clipboard_queue.get(timeout=0.5)
                except queue.Empty:
                    break
                else:
                    img = item if isinstance(item, Image.Image) else Image.open(io.BytesIO(item))
                    process_and_write_results(img, None, None)               
        elif read_from == 'screencapture':
            img = None
            if screen_capture_on_combo:
                try:
                    img = screenshot_queue.get(timeout=0.5)
                except queue.Empty:
                    pass
                else:
                    if not img:
                        on_window_closed(False)
                        terminated = True
                        break
            else:
                sleep_time = 0.5
                if (not paused) and screencapture_window_active and screencapture_window_visible:
                    img = take_screenshot()
                    if not img:
                        on_window_closed(False)
                        terminated = True
                        break
                    sleep_time = screen_capture_delay_secs
            if img:
                res = process_and_write_results(img, last_result, filtering)
                if res:
                    last_result = (res, engine_index)
-        else:
+            else:
-            sleep_time = delay_secs
+                process_and_write_results(img, None, None)
-            for path in read_from.iterdir():
+            if isinstance(img, Path):
-                if path.suffix.lower() in allowed_extensions:
+                if delete_images:
-                    path_key = get_path_key(path)
+                    Path.unlink(img)
                    if path_key not in old_paths:
                        old_paths.add(path_key)
-                        if not paused:
+        elapsed_time = time.time() - start_time
-                            try:
+        if (not terminated) and elapsed_time < 0.1:
-                                img = Image.open(path)
+            time.sleep(0.1 - elapsed_time)
                                img.load()
                            except (UnidentifiedImageError, OSError) as e:
                                logger.warning(f'Error while reading file {path}: {e}')
                            else:
                                process_and_write_results(img, None, None)
                                img.close()
                                if delete_images:
                                    Path.unlink(path)
        if not terminated:
            time.sleep(sleep_time)
-    if read_from == 'websocket' or write_to == 'websocket':
+    if websocket_server_thread:
        websocket_server_thread.stop_server()
        websocket_server_thread.join()
-    if read_from == 'clipboard':
+    if clipboard_thread:
        if sys.platform == 'win32':
            win32api.PostThreadMessage(clipboard_thread.thread_id, win32con.WM_QUIT, 0, 0)
        clipboard_thread.join()
-    elif read_from == 'unixsocket':
+    if directory_watcher_thread:
        directory_watcher_thread.join()
    if unix_socket_server:
        unix_socket_server.shutdown()
        unix_socket_server_thread.join()
-    if len(key_combos) > 0:
+    if key_combo_listener:
        key_combo_listener.stop()
    if auto_pause_handler:
        auto_pause_handler.stop()