Add option to wait for frame stabilization (helps with slow text), allow screenshots on combo at the same time as periodic ones, lots of refactoring

2025-10-09 09:00:16 +02:00
parent 878f164533
commit be8afa6d45
3 changed files with 326 additions and 286 deletions
--- a/owocr/config.py
+++ b/owocr/config.py
@@ -35,7 +35,7 @@ parser.add_argument('-i', '--ignore_flag', type=str2bool, nargs='?', const=True,
 parser.add_argument('-d', '--delete_images', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
                    help='Delete image files after processing when reading from a directory.')
 parser.add_argument('-n', '--notifications', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
-                    help='Show an operating system notification with the detected text. Will be ignored when reading with screen capture, unless screen_capture_combo is set.')
+                    help='Show an operating system notification with the detected text. Will be ignored when reading with screen capture and periodic screenshots.')
 parser.add_argument('-a', '--auto_pause', type=float, default=argparse.SUPPRESS,
                    help='Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture. 0 to disable.')
 parser.add_argument('-cp', '--combo_pause', type=str, default=argparse.SUPPRESS,
@@ -45,11 +45,13 @@ parser.add_argument('-cs', '--combo_engine_switch', type=str, default=argparse.S
 parser.add_argument('-sa', '--screen_capture_area', type=str, default=argparse.SUPPRESS,
                    help='Area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).')
 parser.add_argument('-sd', '--screen_capture_delay_secs', type=float, default=argparse.SUPPRESS,
-                    help='Delay (in seconds) between screenshots when reading with screen capture.')
+                    help='Delay (in seconds) between screenshots when reading with screen capture. -1 to disable periodic screenshots.')
 parser.add_argument('-sw', '--screen_capture_only_active_windows', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
                    help="When reading with screen capture and screen_capture_area is a window name, only target the window while it's active.")
 parser.add_argument('-sf', '--screen_capture_frame_stabilization', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
                    help="When reading with screen capture, try waiting until text is stable before processing it.")
 parser.add_argument('-sc', '--screen_capture_combo', type=str, default=argparse.SUPPRESS,
-                    help='When reading with screen capture, combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
+                    help='When reading with screen capture, combo to wait on for taking a screenshot. If periodic screenshots are also enabled, any screenshot taken this way bypasses the filtering. Example value: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
 parser.add_argument('-l', '--language', type=str, default=argparse.SUPPRESS,
                    help='Two letter language code for filtering screencapture OCR results. Ex. "ja" for Japanese, "zh" for Chinese, "ko" for Korean, "ar" for Arabic, "ru" for Russian, "el" for Greek, "he" for Hebrew, "th" for Thai. Any other value will use Latin Extended (for most European languages and English).')
 parser.add_argument('-of', '--output_format', type=str, default=argparse.SUPPRESS,
@@ -82,8 +84,9 @@ class Config:
        'combo_pause': '',
        'combo_engine_switch': '',
        'screen_capture_area': '',
-        'screen_capture_delay_secs': 3,
+        'screen_capture_delay_secs': -1,
        'screen_capture_only_active_windows': True,
        'screen_capture_frame_stabilization': True,
        'screen_capture_combo': '',
        'screen_capture_old_macos_api': False,
        'language': 'ja',
--- a/owocr/ocr.py
+++ b/owocr/ocr.py
@@ -494,13 +494,12 @@ class GoogleLensWeb:
        lens_object = pyjson5.loads(res.text.splitlines()[2])
-        res = ''
+        res = []
        text = lens_object[0][2][0][0]
        for paragraph in text:
            for line in paragraph[1]:
                for word in line[0]:
-                    res += word[1] + word[2]
+                    res.append(word[1] + word[2])
            res += '\n'
        x = (True, res)
@@ -734,10 +733,10 @@ class AppleVision:
            )
            success = handler.performRequests_error_([req], None)
-            res = ''
+            res = []
            if success[0]:
                for result in req.results():
-                    res += result.text() + '\n'
+                    res.append(result.text())
                x = (True, res)
            else:
                x = (False, 'Unknown error!')
@@ -1105,11 +1104,11 @@ class AzureImageAnalysis:
        except:
            return (False, 'Unknown error!')
-        res = ''
+        res = []
        if read_result.read:
            for block in read_result.read.blocks:
                for line in block.lines:
-                    res += line.text + '\n'
+                    res.append(line.text)
        else:
            return (False, 'Unknown error!')
@@ -1152,10 +1151,10 @@ class EasyOCR:
        if not img:
            return (False, 'Invalid image provided')
-        res = ''
+        res = []
        read_result = self.model.readtext(self._preprocess(img), detail=0)
        for text in read_result:
-            res += text + '\n'
+            res.append(text)
        x = (True, res)
@@ -1216,11 +1215,11 @@ class RapidOCR:
        if not img:
            return (False, 'Invalid image provided')
-        res = ''
+        res = []
        read_results = self.model(self._preprocess(img))
        if read_results:
            for read_result in read_results.txts:
-                res += read_result + '\n'
+                res.append(read_result)
        x = (True, res)
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -304,6 +304,13 @@ class RequestHandler(socketserver.BaseRequestHandler):
 class TextFiltering:
    def __init__(self):
        self.language = config.get_general('language')
        self.frame_stabilization = config.get_general('screen_capture_frame_stabilization')
        self.last_frame_data = None
        self.stable_frame_data = None
        self.last_frame_text = None
        self.stable_frame_text = None
        self.processed_stable_frame = False
        self.cj_regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
        self.regex = self.get_regex()
        self.kana_variants = {
            'ぁ': ['ぁ', 'あ'], 'あ': ['ぁ', 'あ'],
@@ -330,7 +337,7 @@ class TextFiltering:
    def get_regex(self):
        if self.language == 'ja':
-            return re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
+            return self.cj_regex
        elif self.language == 'zh':
            return re.compile(r'[\u4E00-\u9FFF]')
        elif self.language == 'ko':
@@ -354,16 +361,270 @@ class TextFiltering:
        converted_text = ''.join(self.kana_variants.get(char, [char])[-1] for char in text)
        return converted_text
    def _get_line_text(self, line):
        if line.text is not None:
            return line.text
        text_parts = []
        for w in line.words:
            text_parts.append(w.text)
            if w.separator is not None:
                text_parts.append(w.separator)
            else:
                text_parts.append(' ')
        return ''.join(text_parts)
    def _normalize_line_for_comparison(self, line_text):
        if not line_text:
            return ''
        filtered_text = ''.join(self.regex.findall(line_text))
        if self.language == 'ja':
            filtered_text = self.convert_small_kana_to_big(filtered_text)
        return filtered_text
    def _compare_text(self, current_text, prev_text, threshold=82):
        if current_text in prev_text:
            return True
        if len(prev_text) > len(current_text):
            return fuzz.partial_ratio(current_text, prev_text) >= threshold
        return fuzz.ratio(current_text, prev_text) >= threshold
    def _find_changed_lines(self, current_result):
        if (self.last_frame_data is None or self.stable_frame_data is None or
            (self.stable_frame_data and (current_result.image_properties.width != self.stable_frame_data.image_properties.width or
            current_result.image_properties.height != self.stable_frame_data.image_properties.height))):
            self.stable_frame_data = copy.deepcopy(current_result)
            self.last_frame_data = copy.deepcopy(current_result)
            changed_lines = []
            for p in current_result.paragraphs:
                changed_lines.extend(p.lines)
            return changed_lines
        if not self.frame_stabilization:
            changed_lines = self._find_changed_lines_impl(current_result, self.last_frame_data)
            self.last_frame_data = copy.deepcopy(current_result)
            return changed_lines
        frames_match = self._find_changed_lines_impl(current_result, self.last_frame_data) == []
        logger.debug(f"Frames match: '{frames_match}'")
        if frames_match:
            if self.processed_stable_frame:
                return []
            changed_lines = self._find_changed_lines_impl(current_result, self.stable_frame_data)
            self.processed_stable_frame = True
            self.stable_frame_data = copy.deepcopy(current_result)
            return changed_lines
        else:
            self.last_frame_data = copy.deepcopy(current_result)
            self.processed_stable_frame = False
            return []
    def _find_changed_lines_impl(self, current_result, previous_result):
        changed_lines = []
        current_lines = []
        previous_lines = []
        for p in current_result.paragraphs:
            current_lines.extend(p.lines)
        if len(current_lines) == 0:
            return []
        for p in previous_result.paragraphs:
            previous_lines.extend(p.lines)
        all_previous_text_spliced = []
        for prev_line in previous_lines:
            prev_text = self._get_line_text(prev_line)
            prev_text = self._normalize_line_for_comparison(prev_text)
            all_previous_text_spliced.append(prev_text)
        all_previous_text = ''.join(all_previous_text_spliced)
        logger.debug(f"Previous text: '{all_previous_text_spliced}'")
        first = True
        for current_line in current_lines:
            current_text = self._get_line_text(current_line)
            current_text = self._normalize_line_for_comparison(current_text)
            if not current_text:
                continue
            # For the first line, check if it contains the end of previous text
            if first and all_previous_text:
                overlap = self._find_overlap(all_previous_text, current_text)
                if overlap and len(current_text) > len(overlap):
                    logger.debug(f"Found overlap: '{overlap}'")
                    changed_lines.append(current_line)
                    first = False
                    continue
            if len(current_text) < 3:
                text_similar = current_text in all_previous_text_spliced
            else:
                text_similar = self._compare_text(current_text, all_previous_text)
            logger.debug(f"Current line: '{current_text}' Similar: '{text_similar}'")
            if not text_similar:
                changed_lines.append(current_line)
                if len(current_text) >= 3:
                    first = False
        return changed_lines
    def _find_changed_lines_text(self, current_result, two_pass_processing_active=False):
        if not self.frame_stabilization or two_pass_processing_active:
            if self.last_frame_text:
                changed_lines = self._find_changed_lines_text_impl(current_result, self.last_frame_text, True)
                self.last_frame_text = current_result
                return changed_lines
            else:
                self.last_frame_text = current_result
                return current_result
        if self.last_frame_text is None or self.stable_frame_text is None:
            self.stable_frame_text = current_result
            self.last_frame_text = current_result
            return current_result
        frames_match = self._find_changed_lines_text_impl(current_result, self.last_frame_text, False) == []
        logger.debug(f"Frames match: '{frames_match}'")
        if frames_match:
            if self.processed_stable_frame:
                return []
            changed_lines = self._find_changed_lines_text_impl(current_result, self.stable_frame_text, True)
            self.processed_stable_frame = True
            self.stable_frame_text = current_result
            return changed_lines
        else:
            self.last_frame_text = current_result
            self.processed_stable_frame = False
            return []
    def _find_changed_lines_text_impl(self, current_result, previous_stable_text, filtering):
        if len(current_result) == 0:
            return []
        changed_lines = []
        all_previous_text_spliced = []
        for prev_line in previous_stable_text:
            prev_text = self._normalize_line_for_comparison(prev_line)
            all_previous_text_spliced.append(prev_text)
        all_previous_text = ''.join(all_previous_text_spliced)
        logger.debug(f"Previous text: '{all_previous_text_spliced}'")
        first = True
        for current_line in current_result:
            current_text = self._normalize_line_for_comparison(current_line)
            if not current_text:
                continue
            # For the first line, check if it contains the end of previous text
            if filtering and first and all_previous_text:
                overlap = self._find_overlap(all_previous_text, current_text)
                if overlap and len(current_text) > len(overlap):
                    logger.debug(f"Found overlap: '{overlap}'")
                    current_line = self._cut_at_overlap(current_line, overlap)
                    logger.debug(f"After cutting: '{current_line}'")
                    changed_lines.append(current_line)
                    first = False
                    continue
            if len(current_text) < 3:
                text_similar = current_text in all_previous_text_spliced
            else:
                text_similar = self._compare_text(current_text, all_previous_text)
            logger.debug(f"Current line: '{current_text}' Similar: '{text_similar}'")
            if not text_similar:
                changed_lines.append(current_line)
                if len(current_text) >= 3:
                    first = False
        return changed_lines
    def _find_overlap(self, previous_text, current_text):
        min_overlap_length = 3
        max_overlap_length = min(len(previous_text), len(current_text))
        for overlap_length in range(max_overlap_length, min_overlap_length - 1, -1):
            previous_end = previous_text[-overlap_length:]
            current_start = current_text[:overlap_length]
            if previous_end == current_start:
                return previous_end
        return None
    def _cut_at_overlap(self, current_line, overlap):
        pattern_parts = []
        for char in overlap:
            if char in self.kana_variants:
                variants = self.kana_variants[char]
                pattern_parts.append(f'[{"".join(variants)}]')
            else:
                pattern_parts.append(re.escape(char))
        overlap_pattern = r'.*?'.join(pattern_parts)
        full_pattern = r'^.*?' + overlap_pattern
        logger.debug(f"Cut regex: '{full_pattern}'")
        match = re.search(full_pattern, current_line)
        if match:
            cut_position = match.end()
            return current_line[cut_position:]
        return current_line
    def _create_changed_regions_image(self, pil_image, changed_lines, margin=5):
        img_width, img_height = pil_image.size
        regions = []
        for line in changed_lines:
            bbox = line.bounding_box
            x1 = (bbox.center_x - bbox.width/2) * img_width - margin
            y1 = (bbox.center_y - bbox.height/2) * img_height - margin
            x2 = (bbox.center_x + bbox.width/2) * img_width + margin
            y2 = (bbox.center_y + bbox.height/2) * img_height + margin
            x1 = max(0, int(x1))
            y1 = max(0, int(y1))
            x2 = min(img_width, int(x2))
            y2 = min(img_height, int(y2))
            if x2 > x1 and y2 > y1:
                regions.append((x1, y1, x2, y2))
        if not regions:
            return None
        overall_x1 = min(x1 for x1, y1, x2, y2 in regions)
        overall_y1 = min(y1 for x1, y1, x2, y2 in regions)
        overall_x2 = max(x2 for x1, y1, x2, y2 in regions)
        overall_y2 = max(y2 for x1, y1, x2, y2 in regions)
        result_image = pil_image.crop((overall_x1, overall_y1, overall_x2, overall_y2))
        return result_image
 class ScreenshotThread(threading.Thread):
-    def __init__(self, screen_capture_on_combo):
+    def __init__(self):
        super().__init__(daemon=True)
        screen_capture_area = config.get_general('screen_capture_area')
        self.macos_window_tracker_instance = None
        self.windows_window_tracker_instance = None
        self.screencapture_window_active = True
        self.screencapture_window_visible = True
        self.use_periodic_queue = not screen_capture_on_combo
        if screen_capture_area == '':
            self.screencapture_mode = 0
        elif screen_capture_area.startswith('screen_'):
@@ -460,6 +721,7 @@ class ScreenshotThread(threading.Thread):
                logger.opt(ansi=True).info(f'Selected window: {window_title}')
            else:
                raise ValueError('Window capture is only currently supported on Windows and macOS')
        self.is_combo_screenshot = False
    def get_windows_window_handle(self, window_title):
        def callback(hwnd, window_title_part):
@@ -568,10 +830,11 @@ class ScreenshotThread(threading.Thread):
            on_window_closed(False)
    def write_result(self, result):
-        if self.use_periodic_queue:
+        if self.is_combo_screenshot:
-            periodic_screenshot_queue.put(result)
+            self.is_combo_screenshot = False
        else:
            image_queue.put((result, True))
        else:
            periodic_screenshot_queue.put(result)
    def run(self):
        if self.screencapture_mode != 2:
@@ -681,260 +944,28 @@ class AutopauseTimer:
 class OutputResult:
-    def __init__(self, init_filtering):
+    def __init__(self):
-        self.filtering = TextFiltering() if init_filtering else None
+        self.filtering = TextFiltering()
        self.cj_regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
        self.previous_result = None
        self.previous_result_text = None
    def _coordinate_format_to_string(self, result_data):
        full_text_parts = []
        for p in result_data.paragraphs:
            for l in p.lines:
                full_text_parts.append(self._get_line_text(l))
                full_text_parts.append('\n')
        return ''.join(full_text_parts)
    def _post_process(self, text, strip_spaces):
-        is_cj_text = self.cj_regex.search(text)
+        is_cj_text = self.filtering.cj_regex.search(''.join(text))
        line_separator = '' if strip_spaces else ' '
        if is_cj_text:
-            text = line_separator.join([''.join(i.split()) for i in text.splitlines()])
+            text = line_separator.join([''.join(i.split()) for i in text])
        else:
-            text = line_separator.join([re.sub(r'\s+', ' ', i).strip() for i in text.splitlines()])
+            text = line_separator.join([re.sub(r'\s+', ' ', i).strip() for i in text])
        text = text.replace('…', '...')
        text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
        if is_cj_text:
            text = jaconv.h2z(text, ascii=True, digit=True)
        return text
-    def _get_line_text(self, line):
+    def _extract_lines_from_result(self, result_data):
-        if line.text is not None:
+        lines = []
-            return line.text
+        for p in result_data.paragraphs:
-        text_parts = []
+            for l in p.lines:
-        for w in line.words:
+                lines.append(self.filtering._get_line_text(l))
-            text_parts.append(w.text)
+        return lines
            if w.separator is not None:
                text_parts.append(w.separator)
            else:
                text_parts.append(' ')
        return ''.join(text_parts)
    def _compare_text(self, current_text, prev_text, threshold=82):
        if current_text in prev_text:
            return True
        if len(prev_text) > len(current_text):
            return fuzz.partial_ratio(current_text, prev_text) >= threshold
        return fuzz.ratio(current_text, prev_text) >= threshold
    def _find_changed_lines(self, current_result, previous_result):
        changed_lines = []
        # If no previous result, all lines are considered changed
        if previous_result is None:
            for p in current_result.paragraphs:
                changed_lines.extend(p.lines)
            return changed_lines
        # Check if image sizes are different - if so, treat all lines as changed
        if (current_result.image_properties.width != previous_result.image_properties.width or
            current_result.image_properties.height != previous_result.image_properties.height):
            for p in current_result.paragraphs:
                changed_lines.extend(p.lines)
            return changed_lines
        current_lines = []
        previous_lines = []
        for p in current_result.paragraphs:
            current_lines.extend(p.lines)
        for p in previous_result.paragraphs:
            previous_lines.extend(p.lines)
        all_previous_text_spliced = []
        for prev_line in previous_lines:
            prev_text = self._get_line_text(prev_line)
            prev_text = ''.join(self.filtering.regex.findall(prev_text))
            if self.filtering.language == 'ja':
                prev_text = self.filtering.convert_small_kana_to_big(prev_text)
            all_previous_text_spliced.append(prev_text)
        all_previous_text = ''.join(all_previous_text_spliced)
        logger.debug(f"Previous text: '{all_previous_text_spliced}'")
        first = True
        for current_line in current_lines:
            current_text = self._get_line_text(current_line)
            current_text = ''.join(self.filtering.regex.findall(current_text))
            if not current_text:
                continue
            if self.filtering.language == 'ja':
                current_text = self.filtering.convert_small_kana_to_big(current_text)
            # For the first line, check if it contains the end of previous text
            if first and all_previous_text:
                overlap = self._find_overlap(all_previous_text, current_text)
                if overlap and len(current_text) > len(overlap):
                    logger.debug(f"Found overlap: '{overlap}'")
                    changed_lines.append(current_line)
                    first = False
                    continue
            if len(current_text) < 3:
                text_similar = current_text in all_previous_text_spliced
            else:
                text_similar = self._compare_text(current_text, all_previous_text)
            logger.debug(f"Current line: '{current_text}' Similar: '{text_similar}'")
            if not text_similar:
                changed_lines.append(current_line)
                if len(current_text) >= 3:
                    first = False
        return changed_lines
    def _find_overlap(self, previous_text, current_text):
        """Find the overlapping portion between the end of previous_text and start of current_text."""
        # Try different overlap lengths, starting from the maximum possible
        min_overlap_length = 3  # Minimum overlap to consider meaningful
        max_overlap_length = min(len(previous_text), len(current_text))
        for overlap_length in range(max_overlap_length, min_overlap_length - 1, -1):
            previous_end = previous_text[-overlap_length:]
            current_start = current_text[:overlap_length]
            if previous_end == current_start:
                return previous_end
        return None
    def _cut_at_overlap(self, current_line, overlap):
        pattern_parts = []
        for char in overlap:
            # Check if character is kana and has small/big variants
            if char in self.filtering.kana_variants:
                # Use character class that matches both small and big variants
                variants = self.filtering.kana_variants[char]
                pattern_parts.append(f'[{"".join(variants)}]')
            else:
                # Escape regex special characters for regular characters
                pattern_parts.append(re.escape(char))
        # Create pattern: overlap characters with any characters (0 or more) between them
        overlap_pattern = r'.*?'.join(pattern_parts)
        # Also allow any characters at the beginning
        full_pattern = r'^.*?' + overlap_pattern
        logger.debug(f"Cut regex: '{full_pattern}'")
        # Find the match
        match = re.search(full_pattern, current_line)
        if match:
            # Cut after the matched overlapping portion
            cut_position = match.end()
            return current_line[cut_position:]
        return current_line
    def _find_changed_lines_text(self, current_result):
        # Split both results into lines
        current_lines = current_result.split('\n')
        # If no previous result, all lines are considered changed
        if self.previous_result_text is None:
            self.previous_result_text = current_lines[-10:]  # Keep only last 10 lines
            return current_result
        changed_lines = []
        all_previous_text_spliced = []
        for prev_line in self.previous_result_text:
            prev_text = ''.join(self.filtering.regex.findall(prev_line))
            if self.filtering.language == 'ja':
                prev_text = self.filtering.convert_small_kana_to_big(prev_text)
            all_previous_text_spliced.append(prev_text)
        all_previous_text = ''.join(all_previous_text_spliced)
        logger.debug(f"Previous text: '{all_previous_text_spliced}'")
        first = True
        # Check each current line against the combined previous text
        for current_line in current_lines:
            current_text = ''.join(self.filtering.regex.findall(current_line))
            if not current_text:
                continue
            if self.filtering.language == 'ja':
                current_text = self.filtering.convert_small_kana_to_big(current_text)
            # For the first line, check if it contains the end of previous text
            if first and all_previous_text:
                overlap = self._find_overlap(all_previous_text, current_text)
                if overlap and len(current_text) > len(overlap):
                    logger.debug(f"Found overlap: '{overlap}'")
                    # Cut the current_line to remove the overlapping part
                    current_line = self._cut_at_overlap(current_line, overlap)
                    logger.debug(f"After cutting: '{current_line}'")
                    changed_lines.append(current_line)
                    first = False
                    continue
            if len(current_text) < 3:
                text_similar = current_text in all_previous_text_spliced
            else:
                text_similar = self._compare_text(current_text, all_previous_text)
            logger.debug(f"Current line: '{current_text}' Similar: '{text_similar}'")
            if not text_similar:
                changed_lines.append(current_line)
                if len(current_text) >= 3:
                    first = False
        # Update cache with current lines, keeping only the last 10
        self.previous_result_text.extend(current_lines)
        self.previous_result_text = self.previous_result_text[-10:]
        return '\n'.join(changed_lines)
    def _create_changed_regions_image(self, pil_image, changed_lines, margin=5):
        img_width, img_height = pil_image.size
        # Convert normalized coordinates to pixel coordinates
        regions = []
        for line in changed_lines:
            bbox = line.bounding_box
            # Convert center-based bbox to corner-based
            x1 = (bbox.center_x - bbox.width/2) * img_width - margin
            y1 = (bbox.center_y - bbox.height/2) * img_height - margin
            x2 = (bbox.center_x + bbox.width/2) * img_width + margin
            y2 = (bbox.center_y + bbox.height/2) * img_height + margin
            # Ensure coordinates are within image bounds
            x1 = max(0, int(x1))
            y1 = max(0, int(y1))
            x2 = min(img_width, int(x2))
            y2 = min(img_height, int(y2))
            if x2 > x1 and y2 > y1: #Only add valid regions
                regions.append((x1, y1, x2, y2))
        if not regions:
            return None
        # Calculate the bounding box that contains all regions
        overall_x1 = min(x1 for x1, y1, x2, y2 in regions)
        overall_y1 = min(y1 for x1, y1, x2, y2 in regions)
        overall_x2 = max(x2 for x1, y1, x2, y2 in regions)
        overall_y2 = max(y2 for x1, y1, x2, y2 in regions)
        # Crop the single rectangle containing all changed regions
        result_image = pil_image.crop((overall_x1, overall_y1, overall_x2, overall_y2))
        return result_image
    def __call__(self, img_or_path, filter_text, notify):
        if auto_pause_handler and not filter_text:
@@ -943,6 +974,7 @@ class OutputResult:
        output_format = config.get_general('output_format')
        engine_color = config.get_general('engine_color')
        engine_instance = engine_instances[engine_index]
        two_pass_processing_active = False
        if filter_text and engine_index_2 != -1 and engine_index_2 != engine_index:
            engine_instance_2 = engine_instances[engine_index_2]
@@ -953,15 +985,14 @@ class OutputResult:
            if not res2:
                logger.opt(ansi=True).warning(f'<{engine_color}>{engine_instance_2.readable_name}</{engine_color}> reported an error after {end_time - start_time:0.03f}s: {result_data_2}')
            else:
-                changed_lines = self._find_changed_lines(result_data_2, self.previous_result)
+                two_pass_processing_active = True
                changed_lines = self.filtering._find_changed_lines(result_data_2)
-                self.previous_result = copy.deepcopy(result_data_2)
+                if changed_lines:
                if len(changed_lines) > 0:
                    logger.opt(ansi=True).info(f"<{engine_color}>{engine_instance_2.readable_name}</{engine_color}> found {len(changed_lines)} changed line(s) in {end_time - start_time:0.03f}s, re-OCRing with <{engine_color}>{engine_instance.readable_name}</{engine_color}>")
                    if output_format != 'json':
-                        changed_regions_image = self._create_changed_regions_image(img_or_path, changed_lines)
+                        changed_regions_image = self.filtering._create_changed_regions_image(img_or_path, changed_lines)
                        if changed_regions_image:
                            img_or_path = changed_regions_image
@@ -984,7 +1015,7 @@ class OutputResult:
        result_data_text = None
        if isinstance(result_data, OcrResult):
-            unprocessed_text = self._coordinate_format_to_string(result_data)
+            unprocessed_text = self._extract_lines_from_result(result_data)
            if output_format == 'json':
                result_dict = asdict(result_data)
@@ -995,15 +1026,17 @@ class OutputResult:
        else:
            result_data_text = result_data
-        if result_data_text:
+        if result_data_text != None:
            if output_format == 'json':
                logger.warning(f"Engine '{engine_instance.name}' does not support JSON output. Falling back to text.")
            if filter_text:
-                text_to_process = self._find_changed_lines_text(result_data_text)
+                text_to_process = self.filtering._find_changed_lines_text(result_data_text, two_pass_processing_active)
                if text_to_process == []:
                    return
                output_string = self._post_process(text_to_process, True)
            else:
                output_string = self._post_process(result_data_text, False)
            log_message = output_string
            if output_format == 'json':
                logger.warning(f"Engine '{engine_instance.name}' does not support JSON output. Falling back to text.")
        if verbosity != 0:
            if verbosity < -1:
@@ -1120,6 +1153,7 @@ def on_window_closed(alive):
 def on_screenshot_combo():
    screenshot_thread.is_combo_screenshot = True
    screenshot_event.set()
@@ -1195,13 +1229,13 @@ def run():
    directory_watcher_thread = None
    unix_socket_server = None
    key_combo_listener = None
    init_filtering = False
    auto_pause_handler = None
    engine_index = engine_keys.index(default_engine) if default_engine != '' else 0
    engine_index_2 = engine_keys.index(engine_secondary) if engine_secondary != '' else -1
    engine_color = config.get_general('engine_color')
    combo_pause = config.get_general('combo_pause')
    combo_engine_switch = config.get_general('combo_engine_switch')
    screen_capture_periodic = False
    screen_capture_on_combo = False
    notifier = DesktopNotifierSync()
    image_queue = queue.Queue()
@@ -1226,13 +1260,13 @@ def run():
        if screen_capture_combo != '':
            screen_capture_on_combo = True
            key_combos[screen_capture_combo] = on_screenshot_combo
-        else:
+        if screen_capture_delay_secs != -1:
            global periodic_screenshot_queue
            periodic_screenshot_queue = queue.Queue()
            screen_capture_periodic = True
        screenshot_event = threading.Event()
-        screenshot_thread = ScreenshotThread(screen_capture_on_combo)
+        screenshot_thread = ScreenshotThread()
        screenshot_thread.start()
        init_filtering = True
        read_from_readable.append('screen capture')
    if 'websocket' in (read_from, read_from_secondary):
        read_from_readable.append('websocket')
@@ -1261,7 +1295,7 @@ def run():
        directory_watcher_thread.start()
        read_from_readable.append(f'directory {read_from_path}')
-    output_result = OutputResult(init_filtering)
+    output_result = OutputResult()
    if len(key_combos) > 0:
        key_combo_listener = keyboard.GlobalHotKeys(key_combos)
@@ -1275,9 +1309,8 @@ def run():
        write_to_readable = f'file {write_to}'
    process_queue = (any(i in ('clipboard', 'websocket', 'unixsocket') for i in (read_from, read_from_secondary)) or read_from_path or screen_capture_on_combo)
    process_screenshots = 'screencapture' in (read_from, read_from_secondary) and not screen_capture_on_combo
    signal.signal(signal.SIGINT, signal_handler)
-    if (not process_screenshots) and auto_pause != 0:
+    if (not screen_capture_periodic) and auto_pause != 0:
        auto_pause_handler = AutopauseTimer(auto_pause)
    user_input_thread = threading.Thread(target=user_input_thread_run, daemon=True)
    user_input_thread.start()
@@ -1299,17 +1332,22 @@ def run():
        if process_queue:
            try:
                img, filter_text = image_queue.get(timeout=0.1)
                if screen_capture_periodic:
                    filter_text = False
                notify = True
            except queue.Empty:
                pass
-        if (not img) and process_screenshots:
+        if (not img) and screen_capture_periodic:
            if (not paused) and screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs:
                screenshot_event.set()
-                img = periodic_screenshot_queue.get()
+                try:
                    img = periodic_screenshot_queue.get(timeout=0.1)
                    filter_text = True
                    notify = False
                    last_screenshot_time = time.time()
                except queue.Empty:
                    pass
        if img == 0:
            on_window_closed(False)