From 6f70d05bf2109af542c99c32644eb891bb008239 Mon Sep 17 00:00:00 2001 From: AuroraWright Date: Sat, 18 Oct 2025 17:15:08 +0200 Subject: [PATCH] Add join_lines option --- owocr/config.py | 3 +++ owocr/run.py | 10 ++++++---- owocr_config.ini | 3 +++ pyproject.toml | 2 +- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/owocr/config.py b/owocr/config.py index 97342b5..ce12fbe 100644 --- a/owocr/config.py +++ b/owocr/config.py @@ -58,6 +58,8 @@ parser.add_argument('-scc', '--coordinate_selector_combo', type=str, default=arg help='When reading with screen capture, combo to wait on for invoking the coordinate picker to change the screen/window area. Example value: "++c". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key') parser.add_argument('-l', '--language', type=str, default=argparse.SUPPRESS, help='Two letter language code for filtering screencapture OCR results. Ex. "ja" for Japanese, "zh" for Chinese, "ko" for Korean, "ar" for Arabic, "ru" for Russian, "el" for Greek, "he" for Hebrew, "th" for Thai. Any other value will use Latin Extended (for most European languages and English).') +parser.add_argument('-j', '--join_lines', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS, + help="Display lines in the text output without a space between them.") parser.add_argument('-f', '--furigana_filter', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS, help="Try to filter furigana lines for Japanese.") parser.add_argument('-of', '--output_format', type=str, default=argparse.SUPPRESS, @@ -99,6 +101,7 @@ class Config: 'screen_capture_only_active_windows': True, 'screen_capture_frame_stabilization': -1, 'screen_capture_line_recovery': True, + 'join_lines': False, 'furigana_filter': True, 'screen_capture_combo': '', 'coordinate_selector_combo': '', diff --git a/owocr/run.py b/owocr/run.py index b617c5b..ae8bd41 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -1332,8 +1332,9 @@ class SecondPassThread: def _process_ocr(self): while self.running: try: - img, engine_instance, recovered_lines_count = self.input_queue.get(timeout=0.5) + img, engine_index_local, recovered_lines_count = self.input_queue.get(timeout=0.5) + engine_instance = engine_instances[engine_index_local] start_time = time.time() res, result_data = engine_instance(img) end_time = time.time() @@ -1359,13 +1360,14 @@ class OutputResult: self.engine_color = config.get_general('engine_color') self.verbosity = config.get_general('verbosity') self.notifications = config.get_general('notifications') + self.line_separator = '' if config.get_general('join_lines') else ' ' self.write_to = config.get_general('write_to') self.filtering = TextFiltering() self.second_pass_thread = SecondPassThread() def _post_process(self, text, strip_spaces): is_cj_text = self.filtering.cj_regex.search(''.join(text)) - line_separator = '' if strip_spaces else ' ' + line_separator = '' if strip_spaces else self.line_separator if is_cj_text: text = line_separator.join([''.join(i.split()) for i in text]) else: @@ -1385,7 +1387,7 @@ class OutputResult: def __call__(self, img_or_path, filter_text, auto_pause, notify): engine_index_local = engine_index - engine_instance = engine_instances[engine_index] + engine_instance = engine_instances[engine_index_local] two_pass_processing_active = False result_data = None @@ -1410,7 +1412,7 @@ class OutputResult: img_or_path = changed_regions_image self.second_pass_thread.start() - self.second_pass_thread.submit_task(img_or_path, engine_instance, recovered_lines_count) + self.second_pass_thread.submit_task(img_or_path, engine_index_local, recovered_lines_count) second_pass_result = self.second_pass_thread.get_result() if second_pass_result: diff --git a/owocr_config.ini b/owocr_config.ini index cd77583..418c6b2 100644 --- a/owocr_config.ini +++ b/owocr_config.ini @@ -87,6 +87,9 @@ ;recover missed lines from unstable frames. Can lead to increased glitches. ;screen_capture_line_recovery = True +;Display lines in the text output without a space between them. +;join_lines = False + ;Try to filter furigana lines for Japanese. ;furigana_filter = True diff --git a/pyproject.toml b/pyproject.toml index 3f29619..97b8a13 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "owocr" -version = "1.18.4" +version = "1.18.5" description = "Japanese OCR" readme = "README.md" requires-python = ">=3.11"