Add option to wait for frame stabilization (helps with slow text), allow screenshots on combo at the same time as periodic ones, lots of refactoring

2025-10-09 09:00:16 +02:00
parent 878f164533
commit be8afa6d45
3 changed files with 326 additions and 286 deletions
--- a/owocr/config.py
+++ b/owocr/config.py
@@ -35,7 +35,7 @@ parser.add_argument('-i', '--ignore_flag', type=str2bool, nargs='?', const=True,
 parser.add_argument('-d', '--delete_images', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
                    help='Delete image files after processing when reading from a directory.')
 parser.add_argument('-n', '--notifications', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
-                    help='Show an operating system notification with the detected text. Will be ignored when reading with screen capture, unless screen_capture_combo is set.')
+                    help='Show an operating system notification with the detected text. Will be ignored when reading with screen capture and periodic screenshots.')
 parser.add_argument('-a', '--auto_pause', type=float, default=argparse.SUPPRESS,
                    help='Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture. 0 to disable.')
 parser.add_argument('-cp', '--combo_pause', type=str, default=argparse.SUPPRESS,
@@ -45,11 +45,13 @@ parser.add_argument('-cs', '--combo_engine_switch', type=str, default=argparse.S
 parser.add_argument('-sa', '--screen_capture_area', type=str, default=argparse.SUPPRESS,
                    help='Area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).')
 parser.add_argument('-sd', '--screen_capture_delay_secs', type=float, default=argparse.SUPPRESS,
-                    help='Delay (in seconds) between screenshots when reading with screen capture.')
+                    help='Delay (in seconds) between screenshots when reading with screen capture. -1 to disable periodic screenshots.')
 parser.add_argument('-sw', '--screen_capture_only_active_windows', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
                    help="When reading with screen capture and screen_capture_area is a window name, only target the window while it's active.")
+parser.add_argument('-sf', '--screen_capture_frame_stabilization', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
+                    help="When reading with screen capture, try waiting until text is stable before processing it.")
 parser.add_argument('-sc', '--screen_capture_combo', type=str, default=argparse.SUPPRESS,
-                    help='When reading with screen capture, combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
+                    help='When reading with screen capture, combo to wait on for taking a screenshot. If periodic screenshots are also enabled, any screenshot taken this way bypasses the filtering. Example value: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
 parser.add_argument('-l', '--language', type=str, default=argparse.SUPPRESS,
                    help='Two letter language code for filtering screencapture OCR results. Ex. "ja" for Japanese, "zh" for Chinese, "ko" for Korean, "ar" for Arabic, "ru" for Russian, "el" for Greek, "he" for Hebrew, "th" for Thai. Any other value will use Latin Extended (for most European languages and English).')
 parser.add_argument('-of', '--output_format', type=str, default=argparse.SUPPRESS,
@@ -82,8 +84,9 @@ class Config:
        'combo_pause': '',
        'combo_engine_switch': '',
        'screen_capture_area': '',
-        'screen_capture_delay_secs': 3,
+        'screen_capture_delay_secs': -1,
        'screen_capture_only_active_windows': True,
+        'screen_capture_frame_stabilization': True,
        'screen_capture_combo': '',
        'screen_capture_old_macos_api': False,
        'language': 'ja',
--- a/owocr/ocr.py
+++ b/owocr/ocr.py
@@ -494,13 +494,12 @@ class GoogleLensWeb:

        lens_object = pyjson5.loads(res.text.splitlines()[2])

-        res = ''
+        res = []
        text = lens_object[0][2][0][0]
        for paragraph in text:
            for line in paragraph[1]:
                for word in line[0]:
-                    res += word[1] + word[2]
-            res += '\n'
+                    res.append(word[1] + word[2])

        x = (True, res)

@@ -734,10 +733,10 @@ class AppleVision:
            )

            success = handler.performRequests_error_([req], None)
-            res = ''
+            res = []
            if success[0]:
                for result in req.results():
-                    res += result.text() + '\n'
+                    res.append(result.text())
                x = (True, res)
            else:
                x = (False, 'Unknown error!')
@@ -1105,11 +1104,11 @@ class AzureImageAnalysis:
        except:
            return (False, 'Unknown error!')

-        res = ''
+        res = []
        if read_result.read:
            for block in read_result.read.blocks:
                for line in block.lines:
-                    res += line.text + '\n'
+                    res.append(line.text)
        else:
            return (False, 'Unknown error!')

@@ -1152,10 +1151,10 @@ class EasyOCR:
        if not img:
            return (False, 'Invalid image provided')

-        res = ''
+        res = []
        read_result = self.model.readtext(self._preprocess(img), detail=0)
        for text in read_result:
-            res += text + '\n'
+            res.append(text)

        x = (True, res)

@@ -1216,11 +1215,11 @@ class RapidOCR:
        if not img:
            return (False, 'Invalid image provided')

-        res = ''
+        res = []
        read_results = self.model(self._preprocess(img))
        if read_results:
            for read_result in read_results.txts:
-                res += read_result + '\n'
+                res.append(read_result)

        x = (True, res)

--- a/owocr/run.py
+++ b/owocr/run.py
@@ -304,6 +304,13 @@ class RequestHandler(socketserver.BaseRequestHandler):
 class TextFiltering:
    def __init__(self):
        self.language = config.get_general('language')
+        self.frame_stabilization = config.get_general('screen_capture_frame_stabilization')
+        self.last_frame_data = None
+        self.stable_frame_data = None
+        self.last_frame_text = None
+        self.stable_frame_text = None
+        self.processed_stable_frame = False
+        self.cj_regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
        self.regex = self.get_regex()
        self.kana_variants = {
            'ぁ': ['ぁ', 'あ'], 'あ': ['ぁ', 'あ'],
@@ -330,7 +337,7 @@ class TextFiltering:

    def get_regex(self):
        if self.language == 'ja':
-            return re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
+            return self.cj_regex
        elif self.language == 'zh':
            return re.compile(r'[\u4E00-\u9FFF]')
        elif self.language == 'ko':
@@ -354,16 +361,270 @@ class TextFiltering:
        converted_text = ''.join(self.kana_variants.get(char, [char])[-1] for char in text)
        return converted_text

+    def _get_line_text(self, line):
+        if line.text is not None:
+            return line.text
+        text_parts = []
+        for w in line.words:
+            text_parts.append(w.text)
+            if w.separator is not None:
+                text_parts.append(w.separator)
+            else:
+                text_parts.append(' ')
+        return ''.join(text_parts)
+
+    def _normalize_line_for_comparison(self, line_text):
+        if not line_text:
+            return ''
+        filtered_text = ''.join(self.regex.findall(line_text))
+        if self.language == 'ja':
+            filtered_text = self.convert_small_kana_to_big(filtered_text)
+        return filtered_text
+
+    def _compare_text(self, current_text, prev_text, threshold=82):
+        if current_text in prev_text:
+            return True
+        if len(prev_text) > len(current_text):
+            return fuzz.partial_ratio(current_text, prev_text) >= threshold
+        return fuzz.ratio(current_text, prev_text) >= threshold
+
+    def _find_changed_lines(self, current_result):
+        if (self.last_frame_data is None or self.stable_frame_data is None or
+            (self.stable_frame_data and (current_result.image_properties.width != self.stable_frame_data.image_properties.width or
+            current_result.image_properties.height != self.stable_frame_data.image_properties.height))):
+            self.stable_frame_data = copy.deepcopy(current_result)
+            self.last_frame_data = copy.deepcopy(current_result)
+
+            changed_lines = []
+            for p in current_result.paragraphs:
+                changed_lines.extend(p.lines)
+            return changed_lines
+
+        if not self.frame_stabilization:
+            changed_lines = self._find_changed_lines_impl(current_result, self.last_frame_data)
+            self.last_frame_data = copy.deepcopy(current_result)
+            return changed_lines
+
+        frames_match = self._find_changed_lines_impl(current_result, self.last_frame_data) == []
+
+        logger.debug(f"Frames match: '{frames_match}'")
+
+        if frames_match:
+            if self.processed_stable_frame:
+                return []
+            changed_lines = self._find_changed_lines_impl(current_result, self.stable_frame_data)
+            self.processed_stable_frame = True
+            self.stable_frame_data = copy.deepcopy(current_result)
+            return changed_lines
+        else:
+            self.last_frame_data = copy.deepcopy(current_result)
+            self.processed_stable_frame = False
+            return []
+
+    def _find_changed_lines_impl(self, current_result, previous_result):
+        changed_lines = []
+        current_lines = []
+        previous_lines = []
+
+        for p in current_result.paragraphs:
+            current_lines.extend(p.lines)
+        if len(current_lines) == 0:
+            return []
+
+        for p in previous_result.paragraphs:
+            previous_lines.extend(p.lines)
+
+        all_previous_text_spliced = []
+        for prev_line in previous_lines:
+            prev_text = self._get_line_text(prev_line)
+            prev_text = self._normalize_line_for_comparison(prev_text)
+            all_previous_text_spliced.append(prev_text)
+
+        all_previous_text = ''.join(all_previous_text_spliced)
+
+        logger.debug(f"Previous text: '{all_previous_text_spliced}'")
+
+        first = True
+        for current_line in current_lines:
+            current_text = self._get_line_text(current_line)
+            current_text = self._normalize_line_for_comparison(current_text)
+            if not current_text:
+                continue
+
+            # For the first line, check if it contains the end of previous text
+            if first and all_previous_text:
+                overlap = self._find_overlap(all_previous_text, current_text)
+                if overlap and len(current_text) > len(overlap):
+                    logger.debug(f"Found overlap: '{overlap}'")
+                    changed_lines.append(current_line)
+                    first = False
+                    continue
+
+            if len(current_text) < 3:
+                text_similar = current_text in all_previous_text_spliced
+            else:
+                text_similar = self._compare_text(current_text, all_previous_text)
+
+            logger.debug(f"Current line: '{current_text}' Similar: '{text_similar}'")
+
+            if not text_similar:
+                changed_lines.append(current_line)
+                if len(current_text) >= 3:
+                    first = False
+
+        return changed_lines
+
+    def _find_changed_lines_text(self, current_result, two_pass_processing_active=False):
+        if not self.frame_stabilization or two_pass_processing_active:
+            if self.last_frame_text:
+                changed_lines = self._find_changed_lines_text_impl(current_result, self.last_frame_text, True)
+                self.last_frame_text = current_result
+                return changed_lines
+            else:
+                self.last_frame_text = current_result
+                return current_result
+
+        if self.last_frame_text is None or self.stable_frame_text is None:
+            self.stable_frame_text = current_result
+            self.last_frame_text = current_result
+            return current_result
+
+        frames_match = self._find_changed_lines_text_impl(current_result, self.last_frame_text, False) == []
+
+        logger.debug(f"Frames match: '{frames_match}'")
+
+        if frames_match:
+            if self.processed_stable_frame:
+                return []
+            changed_lines = self._find_changed_lines_text_impl(current_result, self.stable_frame_text, True)
+            self.processed_stable_frame = True
+            self.stable_frame_text = current_result
+            return changed_lines
+        else:
+            self.last_frame_text = current_result
+            self.processed_stable_frame = False
+            return []
+
+    def _find_changed_lines_text_impl(self, current_result, previous_stable_text, filtering):
+        if len(current_result) == 0:
+            return []
+
+        changed_lines = []
+        all_previous_text_spliced = []
+
+        for prev_line in previous_stable_text:
+            prev_text = self._normalize_line_for_comparison(prev_line)
+            all_previous_text_spliced.append(prev_text)
+
+        all_previous_text = ''.join(all_previous_text_spliced)
+
+        logger.debug(f"Previous text: '{all_previous_text_spliced}'")
+
+        first = True
+        for current_line in current_result:
+            current_text = self._normalize_line_for_comparison(current_line)
+            if not current_text:
+                continue
+
+            # For the first line, check if it contains the end of previous text
+            if filtering and first and all_previous_text:
+                overlap = self._find_overlap(all_previous_text, current_text)
+                if overlap and len(current_text) > len(overlap):
+                    logger.debug(f"Found overlap: '{overlap}'")
+                    current_line = self._cut_at_overlap(current_line, overlap)
+                    logger.debug(f"After cutting: '{current_line}'")
+                    changed_lines.append(current_line)
+                    first = False
+                    continue
+
+            if len(current_text) < 3:
+                text_similar = current_text in all_previous_text_spliced
+            else:
+                text_similar = self._compare_text(current_text, all_previous_text)
+
+            logger.debug(f"Current line: '{current_text}' Similar: '{text_similar}'")
+
+            if not text_similar:
+                changed_lines.append(current_line)
+                if len(current_text) >= 3:
+                    first = False
+
+        return changed_lines
+
+    def _find_overlap(self, previous_text, current_text):
+        min_overlap_length = 3
+        max_overlap_length = min(len(previous_text), len(current_text))
+
+        for overlap_length in range(max_overlap_length, min_overlap_length - 1, -1):
+            previous_end = previous_text[-overlap_length:]
+            current_start = current_text[:overlap_length]
+
+            if previous_end == current_start:
+                return previous_end
+
+        return None
+
+    def _cut_at_overlap(self, current_line, overlap):
+        pattern_parts = []
+        for char in overlap:
+            if char in self.kana_variants:
+                variants = self.kana_variants[char]
+                pattern_parts.append(f'[{"".join(variants)}]')
+            else:
+                pattern_parts.append(re.escape(char))
+
+        overlap_pattern = r'.*?'.join(pattern_parts)
+        full_pattern = r'^.*?' + overlap_pattern
+
+        logger.debug(f"Cut regex: '{full_pattern}'")
+
+        match = re.search(full_pattern, current_line)
+        if match:
+            cut_position = match.end()
+            return current_line[cut_position:]
+
+        return current_line
+
+    def _create_changed_regions_image(self, pil_image, changed_lines, margin=5):
+        img_width, img_height = pil_image.size
+
+        regions = []
+        for line in changed_lines:
+            bbox = line.bounding_box
+            x1 = (bbox.center_x - bbox.width/2) * img_width - margin
+            y1 = (bbox.center_y - bbox.height/2) * img_height - margin
+            x2 = (bbox.center_x + bbox.width/2) * img_width + margin
+            y2 = (bbox.center_y + bbox.height/2) * img_height + margin
+
+            x1 = max(0, int(x1))
+            y1 = max(0, int(y1))
+            x2 = min(img_width, int(x2))
+            y2 = min(img_height, int(y2))
+
+            if x2 > x1 and y2 > y1:
+                regions.append((x1, y1, x2, y2))
+
+        if not regions:
+            return None
+
+        overall_x1 = min(x1 for x1, y1, x2, y2 in regions)
+        overall_y1 = min(y1 for x1, y1, x2, y2 in regions)
+        overall_x2 = max(x2 for x1, y1, x2, y2 in regions)
+        overall_y2 = max(y2 for x1, y1, x2, y2 in regions)
+
+        result_image = pil_image.crop((overall_x1, overall_y1, overall_x2, overall_y2))
+
+        return result_image
+

 class ScreenshotThread(threading.Thread):
-    def __init__(self, screen_capture_on_combo):
+    def __init__(self):
        super().__init__(daemon=True)
        screen_capture_area = config.get_general('screen_capture_area')
        self.macos_window_tracker_instance = None
        self.windows_window_tracker_instance = None
        self.screencapture_window_active = True
        self.screencapture_window_visible = True
-        self.use_periodic_queue = not screen_capture_on_combo
        if screen_capture_area == '':
            self.screencapture_mode = 0
        elif screen_capture_area.startswith('screen_'):
@@ -460,6 +721,7 @@ class ScreenshotThread(threading.Thread):
                logger.opt(ansi=True).info(f'Selected window: {window_title}')
            else:
                raise ValueError('Window capture is only currently supported on Windows and macOS')
+        self.is_combo_screenshot = False

    def get_windows_window_handle(self, window_title):
        def callback(hwnd, window_title_part):
@@ -568,10 +830,11 @@ class ScreenshotThread(threading.Thread):
            on_window_closed(False)

    def write_result(self, result):
-        if self.use_periodic_queue:
-            periodic_screenshot_queue.put(result)
-        else:
+        if self.is_combo_screenshot:
+            self.is_combo_screenshot = False
            image_queue.put((result, True))
+        else:
+            periodic_screenshot_queue.put(result)

    def run(self):
        if self.screencapture_mode != 2:
@@ -681,260 +944,28 @@ class AutopauseTimer:


 class OutputResult:
-    def __init__(self, init_filtering):
-        self.filtering = TextFiltering() if init_filtering else None
-        self.cj_regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
-        self.previous_result = None
-        self.previous_result_text = None
-
-    def _coordinate_format_to_string(self, result_data):
-        full_text_parts = []
-        for p in result_data.paragraphs:
-            for l in p.lines:
-                full_text_parts.append(self._get_line_text(l))
-                full_text_parts.append('\n')
-        return ''.join(full_text_parts)
+    def __init__(self):
+        self.filtering = TextFiltering()

    def _post_process(self, text, strip_spaces):
-        is_cj_text = self.cj_regex.search(text)
+        is_cj_text = self.filtering.cj_regex.search(''.join(text))
        line_separator = '' if strip_spaces else ' '
        if is_cj_text:
-            text = line_separator.join([''.join(i.split()) for i in text.splitlines()])
+            text = line_separator.join([''.join(i.split()) for i in text])
        else:
-            text = line_separator.join([re.sub(r'\s+', ' ', i).strip() for i in text.splitlines()])
+            text = line_separator.join([re.sub(r'\s+', ' ', i).strip() for i in text])
        text = text.replace('…', '...')
        text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
        if is_cj_text:
            text = jaconv.h2z(text, ascii=True, digit=True)
        return text

-    def _get_line_text(self, line):
-        if line.text is not None:
-            return line.text
-        text_parts = []
-        for w in line.words:
-            text_parts.append(w.text)
-            if w.separator is not None:
-                text_parts.append(w.separator)
-            else:
-                text_parts.append(' ')
-        return ''.join(text_parts)
-
-    def _compare_text(self, current_text, prev_text, threshold=82):
-        if current_text in prev_text:
-            return True
-        if len(prev_text) > len(current_text):
-            return fuzz.partial_ratio(current_text, prev_text) >= threshold
-        return fuzz.ratio(current_text, prev_text) >= threshold
-
-    def _find_changed_lines(self, current_result, previous_result):
-        changed_lines = []
-
-        # If no previous result, all lines are considered changed
-        if previous_result is None:
-            for p in current_result.paragraphs:
-                changed_lines.extend(p.lines)
-            return changed_lines
-
-        # Check if image sizes are different - if so, treat all lines as changed
-        if (current_result.image_properties.width != previous_result.image_properties.width or
-            current_result.image_properties.height != previous_result.image_properties.height):
-            for p in current_result.paragraphs:
-                changed_lines.extend(p.lines)
-            return changed_lines
-
-        current_lines = []
-        previous_lines = []
-
-        for p in current_result.paragraphs:
-            current_lines.extend(p.lines)
-        for p in previous_result.paragraphs:
-            previous_lines.extend(p.lines)
-
-        all_previous_text_spliced = []
-        for prev_line in previous_lines:
-            prev_text = self._get_line_text(prev_line)
-            prev_text = ''.join(self.filtering.regex.findall(prev_text))
-            if self.filtering.language == 'ja':
-                prev_text = self.filtering.convert_small_kana_to_big(prev_text)
-            all_previous_text_spliced.append(prev_text)
-
-        all_previous_text = ''.join(all_previous_text_spliced)
-
-        logger.debug(f"Previous text: '{all_previous_text_spliced}'")
-
-        first = True
-        for current_line in current_lines:
-            current_text = self._get_line_text(current_line)
-            current_text = ''.join(self.filtering.regex.findall(current_text))
-            if not current_text:
-                continue
-            if self.filtering.language == 'ja':
-                current_text = self.filtering.convert_small_kana_to_big(current_text)
-
-            # For the first line, check if it contains the end of previous text
-            if first and all_previous_text:
-                overlap = self._find_overlap(all_previous_text, current_text)
-                if overlap and len(current_text) > len(overlap):
-                    logger.debug(f"Found overlap: '{overlap}'")
-                    changed_lines.append(current_line)
-                    first = False
-                    continue
-
-            if len(current_text) < 3:
-                text_similar = current_text in all_previous_text_spliced
-            else:
-                text_similar = self._compare_text(current_text, all_previous_text)
-
-            logger.debug(f"Current line: '{current_text}' Similar: '{text_similar}'")
-
-            if not text_similar:
-                changed_lines.append(current_line)
-                if len(current_text) >= 3:
-                    first = False
-
-        return changed_lines
-
-    def _find_overlap(self, previous_text, current_text):
-        """Find the overlapping portion between the end of previous_text and start of current_text."""
-        # Try different overlap lengths, starting from the maximum possible
-        min_overlap_length = 3  # Minimum overlap to consider meaningful
-        max_overlap_length = min(len(previous_text), len(current_text))
-
-        for overlap_length in range(max_overlap_length, min_overlap_length - 1, -1):
-            previous_end = previous_text[-overlap_length:]
-            current_start = current_text[:overlap_length]
-
-            if previous_end == current_start:
-                return previous_end
-
-        return None
-
-    def _cut_at_overlap(self, current_line, overlap):
-        pattern_parts = []
-        for char in overlap:
-            # Check if character is kana and has small/big variants
-            if char in self.filtering.kana_variants:
-                # Use character class that matches both small and big variants
-                variants = self.filtering.kana_variants[char]
-                pattern_parts.append(f'[{"".join(variants)}]')
-            else:
-                # Escape regex special characters for regular characters
-                pattern_parts.append(re.escape(char))
-
-        # Create pattern: overlap characters with any characters (0 or more) between them
-        overlap_pattern = r'.*?'.join(pattern_parts)
-
-        # Also allow any characters at the beginning
-        full_pattern = r'^.*?' + overlap_pattern
-
-        logger.debug(f"Cut regex: '{full_pattern}'")
-
-        # Find the match
-        match = re.search(full_pattern, current_line)
-        if match:
-            # Cut after the matched overlapping portion
-            cut_position = match.end()
-            return current_line[cut_position:]
-
-        return current_line
-
-    def _find_changed_lines_text(self, current_result):
-        # Split both results into lines
-        current_lines = current_result.split('\n')
-
-        # If no previous result, all lines are considered changed
-        if self.previous_result_text is None:
-            self.previous_result_text = current_lines[-10:]  # Keep only last 10 lines
-            return current_result
-
-        changed_lines = []
-        all_previous_text_spliced = []
-
-        for prev_line in self.previous_result_text:
-            prev_text = ''.join(self.filtering.regex.findall(prev_line))
-            if self.filtering.language == 'ja':
-                prev_text = self.filtering.convert_small_kana_to_big(prev_text)
-            all_previous_text_spliced.append(prev_text)
-
-        all_previous_text = ''.join(all_previous_text_spliced)
-
-        logger.debug(f"Previous text: '{all_previous_text_spliced}'")
-
-        first = True
-        # Check each current line against the combined previous text
-        for current_line in current_lines:
-            current_text = ''.join(self.filtering.regex.findall(current_line))
-            if not current_text:
-                continue
-            if self.filtering.language == 'ja':
-                current_text = self.filtering.convert_small_kana_to_big(current_text)
-
-            # For the first line, check if it contains the end of previous text
-            if first and all_previous_text:
-                overlap = self._find_overlap(all_previous_text, current_text)
-                if overlap and len(current_text) > len(overlap):
-                    logger.debug(f"Found overlap: '{overlap}'")
-                    # Cut the current_line to remove the overlapping part
-                    current_line = self._cut_at_overlap(current_line, overlap)
-                    logger.debug(f"After cutting: '{current_line}'")
-                    changed_lines.append(current_line)
-                    first = False
-                    continue
-
-            if len(current_text) < 3:
-                text_similar = current_text in all_previous_text_spliced
-            else:
-                text_similar = self._compare_text(current_text, all_previous_text)
-
-            logger.debug(f"Current line: '{current_text}' Similar: '{text_similar}'")
-
-            if not text_similar:
-                changed_lines.append(current_line)
-                if len(current_text) >= 3:
-                    first = False
-
-        # Update cache with current lines, keeping only the last 10
-        self.previous_result_text.extend(current_lines)
-        self.previous_result_text = self.previous_result_text[-10:]
-
-        return '\n'.join(changed_lines)
-
-    def _create_changed_regions_image(self, pil_image, changed_lines, margin=5):
-        img_width, img_height = pil_image.size
-
-        # Convert normalized coordinates to pixel coordinates
-        regions = []
-        for line in changed_lines:
-            bbox = line.bounding_box
-            # Convert center-based bbox to corner-based
-            x1 = (bbox.center_x - bbox.width/2) * img_width - margin
-            y1 = (bbox.center_y - bbox.height/2) * img_height - margin
-            x2 = (bbox.center_x + bbox.width/2) * img_width + margin
-            y2 = (bbox.center_y + bbox.height/2) * img_height + margin
-
-            # Ensure coordinates are within image bounds
-            x1 = max(0, int(x1))
-            y1 = max(0, int(y1))
-            x2 = min(img_width, int(x2))
-            y2 = min(img_height, int(y2))
-
-            if x2 > x1 and y2 > y1: #Only add valid regions
-                regions.append((x1, y1, x2, y2))
-
-        if not regions:
-            return None
-
-        # Calculate the bounding box that contains all regions
-        overall_x1 = min(x1 for x1, y1, x2, y2 in regions)
-        overall_y1 = min(y1 for x1, y1, x2, y2 in regions)
-        overall_x2 = max(x2 for x1, y1, x2, y2 in regions)
-        overall_y2 = max(y2 for x1, y1, x2, y2 in regions)
-
-        # Crop the single rectangle containing all changed regions
-        result_image = pil_image.crop((overall_x1, overall_y1, overall_x2, overall_y2))
-
-        return result_image
+    def _extract_lines_from_result(self, result_data):
+        lines = []
+        for p in result_data.paragraphs:
+            for l in p.lines:
+                lines.append(self.filtering._get_line_text(l))
+        return lines

    def __call__(self, img_or_path, filter_text, notify):
        if auto_pause_handler and not filter_text:
@@ -943,6 +974,7 @@ class OutputResult:
        output_format = config.get_general('output_format')
        engine_color = config.get_general('engine_color')
        engine_instance = engine_instances[engine_index]
+        two_pass_processing_active = False

        if filter_text and engine_index_2 != -1 and engine_index_2 != engine_index:
            engine_instance_2 = engine_instances[engine_index_2]
@@ -953,15 +985,14 @@ class OutputResult:
            if not res2:
                logger.opt(ansi=True).warning(f'<{engine_color}>{engine_instance_2.readable_name}</{engine_color}> reported an error after {end_time - start_time:0.03f}s: {result_data_2}')
            else:
-                changed_lines = self._find_changed_lines(result_data_2, self.previous_result)
+                two_pass_processing_active = True
+                changed_lines = self.filtering._find_changed_lines(result_data_2)

-                self.previous_result = copy.deepcopy(result_data_2)
-
-                if len(changed_lines) > 0:
+                if changed_lines:
                    logger.opt(ansi=True).info(f"<{engine_color}>{engine_instance_2.readable_name}</{engine_color}> found {len(changed_lines)} changed line(s) in {end_time - start_time:0.03f}s, re-OCRing with <{engine_color}>{engine_instance.readable_name}</{engine_color}>")

                    if output_format != 'json':
-                        changed_regions_image = self._create_changed_regions_image(img_or_path, changed_lines)
+                        changed_regions_image = self.filtering._create_changed_regions_image(img_or_path, changed_lines)

                        if changed_regions_image:
                            img_or_path = changed_regions_image
@@ -984,7 +1015,7 @@ class OutputResult:
        result_data_text = None

        if isinstance(result_data, OcrResult):
-            unprocessed_text = self._coordinate_format_to_string(result_data)
+            unprocessed_text = self._extract_lines_from_result(result_data)

            if output_format == 'json':
                result_dict = asdict(result_data)
@@ -995,15 +1026,17 @@ class OutputResult:
        else:
            result_data_text = result_data

-        if result_data_text:
-            if output_format == 'json':
-                logger.warning(f"Engine '{engine_instance.name}' does not support JSON output. Falling back to text.")
+        if result_data_text != None:
            if filter_text:
-                text_to_process = self._find_changed_lines_text(result_data_text)
+                text_to_process = self.filtering._find_changed_lines_text(result_data_text, two_pass_processing_active)
+                if text_to_process == []:
+                    return
                output_string = self._post_process(text_to_process, True)
            else:
                output_string = self._post_process(result_data_text, False)
            log_message = output_string
+            if output_format == 'json':
+                logger.warning(f"Engine '{engine_instance.name}' does not support JSON output. Falling back to text.")

        if verbosity != 0:
            if verbosity < -1:
@@ -1120,6 +1153,7 @@ def on_window_closed(alive):


 def on_screenshot_combo():
+    screenshot_thread.is_combo_screenshot = True
    screenshot_event.set()


@@ -1195,13 +1229,13 @@ def run():
    directory_watcher_thread = None
    unix_socket_server = None
    key_combo_listener = None
-    init_filtering = False
    auto_pause_handler = None
    engine_index = engine_keys.index(default_engine) if default_engine != '' else 0
    engine_index_2 = engine_keys.index(engine_secondary) if engine_secondary != '' else -1
    engine_color = config.get_general('engine_color')
    combo_pause = config.get_general('combo_pause')
    combo_engine_switch = config.get_general('combo_engine_switch')
+    screen_capture_periodic = False
    screen_capture_on_combo = False
    notifier = DesktopNotifierSync()
    image_queue = queue.Queue()
@@ -1226,13 +1260,13 @@ def run():
        if screen_capture_combo != '':
            screen_capture_on_combo = True
            key_combos[screen_capture_combo] = on_screenshot_combo
-        else:
+        if screen_capture_delay_secs != -1:
            global periodic_screenshot_queue
            periodic_screenshot_queue = queue.Queue()
+            screen_capture_periodic = True
        screenshot_event = threading.Event()
-        screenshot_thread = ScreenshotThread(screen_capture_on_combo)
+        screenshot_thread = ScreenshotThread()
        screenshot_thread.start()
-        init_filtering = True
        read_from_readable.append('screen capture')
    if 'websocket' in (read_from, read_from_secondary):
        read_from_readable.append('websocket')
@@ -1261,7 +1295,7 @@ def run():
        directory_watcher_thread.start()
        read_from_readable.append(f'directory {read_from_path}')

-    output_result = OutputResult(init_filtering)
+    output_result = OutputResult()

    if len(key_combos) > 0:
        key_combo_listener = keyboard.GlobalHotKeys(key_combos)
@@ -1275,9 +1309,8 @@ def run():
        write_to_readable = f'file {write_to}'

    process_queue = (any(i in ('clipboard', 'websocket', 'unixsocket') for i in (read_from, read_from_secondary)) or read_from_path or screen_capture_on_combo)
-    process_screenshots = 'screencapture' in (read_from, read_from_secondary) and not screen_capture_on_combo
    signal.signal(signal.SIGINT, signal_handler)
-    if (not process_screenshots) and auto_pause != 0:
+    if (not screen_capture_periodic) and auto_pause != 0:
        auto_pause_handler = AutopauseTimer(auto_pause)
    user_input_thread = threading.Thread(target=user_input_thread_run, daemon=True)
    user_input_thread.start()
@@ -1299,17 +1332,22 @@ def run():
        if process_queue:
            try:
                img, filter_text = image_queue.get(timeout=0.1)
+                if screen_capture_periodic:
+                    filter_text = False
                notify = True
            except queue.Empty:
                pass

-        if (not img) and process_screenshots:
+        if (not img) and screen_capture_periodic:
            if (not paused) and screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs:
                screenshot_event.set()
-                img = periodic_screenshot_queue.get()
-                filter_text = True
-                notify = False
-                last_screenshot_time = time.time()
+                try:
+                    img = periodic_screenshot_queue.get(timeout=0.1)
+                    filter_text = True
+                    notify = False
+                    last_screenshot_time = time.time()
+                except queue.Empty:
+                    pass

        if img == 0:
            on_window_closed(False)