Add toggle for the text reordering code
This commit is contained in:
@@ -64,8 +64,10 @@ parser.add_argument('-j', '--join_lines', type=str2bool, nargs='?', const=True,
|
||||
help='Display lines in the text output without a space between them.')
|
||||
parser.add_argument('-jp', '--join_paragraphs', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
|
||||
help='Display paragraphs in the text output without a space between them.')
|
||||
parser.add_argument('-rt', '--reorder_text', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
|
||||
help='Regroup and reorder text instead of using paragraphs/order provided by the OCR engine.')
|
||||
parser.add_argument('-f', '--furigana_filter', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
|
||||
help='Try to filter furigana lines for Japanese.')
|
||||
help='Try to filter furigana lines for Japanese. Depends on reorder_text.')
|
||||
parser.add_argument('-of', '--output_format', type=str, default=argparse.SUPPRESS,
|
||||
help='The output format for OCR results. Can be "text" (default) or "json" (to include coordinates).')
|
||||
parser.add_argument('-wp', '--websocket_port', type=int, default=argparse.SUPPRESS,
|
||||
@@ -107,6 +109,7 @@ class Config:
|
||||
'screen_capture_line_recovery': True,
|
||||
'screen_capture_regex_filter': '',
|
||||
'join_lines': False,
|
||||
'reorder_text': True,
|
||||
'furigana_filter': True,
|
||||
'screen_capture_combo': '',
|
||||
'coordinate_selector_combo': '',
|
||||
|
||||
16
owocr/run.py
16
owocr/run.py
@@ -807,8 +807,8 @@ class TextFiltering:
|
||||
return horizontal_overlap > 0.7 and vertical_distance < line_height * 2
|
||||
|
||||
def _merge_overlapping_lines(self, lines, is_vertical):
|
||||
if not lines:
|
||||
return []
|
||||
if len(lines) < 2:
|
||||
return lines
|
||||
|
||||
merged = []
|
||||
used_indices = set()
|
||||
@@ -1017,6 +1017,10 @@ class TextFiltering:
|
||||
paragraphs = row['paragraphs']
|
||||
is_vertical = row['is_vertical']
|
||||
|
||||
if len(paragraphs) < 2:
|
||||
reordered_rows.append(row)
|
||||
continue
|
||||
|
||||
# Sort paragraphs by x-coordinate (left edge)
|
||||
paragraphs_sorted = sorted(paragraphs, key=lambda p: p.bounding_box.left)
|
||||
|
||||
@@ -1035,7 +1039,7 @@ class TextFiltering:
|
||||
return reordered_rows
|
||||
|
||||
def _reorder_mixed_orientation_blocks(self, paragraphs, row_is_vertical):
|
||||
if not paragraphs:
|
||||
if len(paragraphs) < 2:
|
||||
return paragraphs
|
||||
|
||||
result = []
|
||||
@@ -1066,7 +1070,6 @@ class TextFiltering:
|
||||
return result
|
||||
|
||||
def _flatten_rows_to_paragraphs(self, rows):
|
||||
# Sort rows by vertical position (top to bottom)
|
||||
rows_sorted = sorted(rows, key=lambda r: min(p.bounding_box.top for p in r['paragraphs']))
|
||||
|
||||
if self.debug_filtering:
|
||||
@@ -1075,7 +1078,6 @@ class TextFiltering:
|
||||
for p in r['paragraphs']:
|
||||
logger.opt(colors=True).debug("<green> Paragraph: '{}' vertical: '{}'</>", [self.get_line_text(line) for line in p.lines], p.writing_direction == 'TOP_TO_BOTTOM')
|
||||
|
||||
# Flatten all paragraphs
|
||||
all_paragraphs = []
|
||||
for row in rows_sorted:
|
||||
all_paragraphs.extend(row['paragraphs'])
|
||||
@@ -1793,6 +1795,7 @@ class OutputResult:
|
||||
self.engine_color = config.get_general('engine_color')
|
||||
self.verbosity = config.get_general('verbosity')
|
||||
self.notifications = config.get_general('notifications')
|
||||
self.reorder_text = config.get_general('reorder_text')
|
||||
self.line_separator = '' if config.get_general('join_lines') else ' '
|
||||
self.paragraph_separator = '' if config.get_general('join_paragraphs') else ' '
|
||||
self.write_to = config.get_general('write_to')
|
||||
@@ -1880,7 +1883,8 @@ class OutputResult:
|
||||
return
|
||||
|
||||
if isinstance(result_data, OcrResult):
|
||||
result_data = self.filtering.order_paragraphs_and_lines(result_data)
|
||||
if self.reorder_text:
|
||||
result_data = self.filtering.order_paragraphs_and_lines(result_data)
|
||||
result_data_text = self._extract_lines_from_result(result_data)
|
||||
else:
|
||||
result_data_text = result_data
|
||||
|
||||
@@ -101,7 +101,11 @@
|
||||
;Display paragraphs in the text output without a space between them.
|
||||
;join_paragraphs = False
|
||||
|
||||
;Try to filter furigana lines for Japanese.
|
||||
;Regroup and reorder text instead of using paragraphs/order provided by the OCR
|
||||
;engine.
|
||||
;reorder_text = True
|
||||
|
||||
;Try to filter furigana lines for Japanese. Depends on reorder_text.
|
||||
;furigana_filter = True
|
||||
|
||||
;When reading with screen capture, combo to wait on for taking a screenshot.
|
||||
|
||||
Reference in New Issue
Block a user