Add join_lines option

2025-10-18 17:15:08 +02:00
parent 4150d8ba45
commit 6f70d05bf2
4 changed files with 13 additions and 5 deletions
--- a/owocr/config.py
+++ b/owocr/config.py
@@ -58,6 +58,8 @@ parser.add_argument('-scc', '--coordinate_selector_combo', type=str, default=arg
                    help='When reading with screen capture, combo to wait on for invoking the coordinate picker to change the screen/window area. Example value: "<ctrl>+<shift>+c". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
 parser.add_argument('-l', '--language', type=str, default=argparse.SUPPRESS,
                    help='Two letter language code for filtering screencapture OCR results. Ex. "ja" for Japanese, "zh" for Chinese, "ko" for Korean, "ar" for Arabic, "ru" for Russian, "el" for Greek, "he" for Hebrew, "th" for Thai. Any other value will use Latin Extended (for most European languages and English).')
+parser.add_argument('-j', '--join_lines', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
+                    help="Display lines in the text output without a space between them.")
 parser.add_argument('-f', '--furigana_filter', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
                    help="Try to filter furigana lines for Japanese.")
 parser.add_argument('-of', '--output_format', type=str, default=argparse.SUPPRESS,
@@ -99,6 +101,7 @@ class Config:
        'screen_capture_only_active_windows': True,
        'screen_capture_frame_stabilization': -1,
        'screen_capture_line_recovery': True,
+        'join_lines': False,
        'furigana_filter': True,
        'screen_capture_combo': '',
        'coordinate_selector_combo': '',
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -1332,8 +1332,9 @@ class SecondPassThread:
    def _process_ocr(self):
        while self.running:
            try:
-                img, engine_instance, recovered_lines_count = self.input_queue.get(timeout=0.5)
+                img, engine_index_local, recovered_lines_count = self.input_queue.get(timeout=0.5)

+                engine_instance = engine_instances[engine_index_local]
                start_time = time.time()
                res, result_data = engine_instance(img)
                end_time = time.time()
@@ -1359,13 +1360,14 @@ class OutputResult:
        self.engine_color = config.get_general('engine_color')
        self.verbosity = config.get_general('verbosity')
        self.notifications = config.get_general('notifications')
+        self.line_separator = '' if config.get_general('join_lines') else ' '
        self.write_to = config.get_general('write_to')
        self.filtering = TextFiltering()
        self.second_pass_thread = SecondPassThread()

    def _post_process(self, text, strip_spaces):
        is_cj_text = self.filtering.cj_regex.search(''.join(text))
-        line_separator = '' if strip_spaces else ' '
+        line_separator = '' if strip_spaces else self.line_separator
        if is_cj_text:
            text = line_separator.join([''.join(i.split()) for i in text])
        else:
@@ -1385,7 +1387,7 @@ class OutputResult:

    def __call__(self, img_or_path, filter_text, auto_pause, notify):
        engine_index_local = engine_index
-        engine_instance = engine_instances[engine_index]
+        engine_instance = engine_instances[engine_index_local]
        two_pass_processing_active = False
        result_data = None

@@ -1410,7 +1412,7 @@ class OutputResult:
                            img_or_path = changed_regions_image

                        self.second_pass_thread.start()
-                        self.second_pass_thread.submit_task(img_or_path, engine_instance, recovered_lines_count)
+                        self.second_pass_thread.submit_task(img_or_path, engine_index_local, recovered_lines_count)

                second_pass_result = self.second_pass_thread.get_result()
                if second_pass_result:
--- a/owocr_config.ini
+++ b/owocr_config.ini
@@ -87,6 +87,9 @@
 ;recover missed lines from unstable frames. Can lead to increased glitches.
 ;screen_capture_line_recovery = True

+;Display lines in the text output without a space between them.
+;join_lines = False
+
 ;Try to filter furigana lines for Japanese.
 ;furigana_filter = True

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "owocr"
-version = "1.18.4"
+version = "1.18.5"
 description = "Japanese OCR"
 readme = "README.md"
 requires-python = ">=3.11"