Avoid reordering horizontal text if it overlaps horizontally
This commit is contained in:
28
owocr/run.py
28
owocr/run.py
@@ -792,10 +792,10 @@ class TextFiltering:
|
|||||||
return result_data
|
return result_data
|
||||||
|
|
||||||
paragraphs_with_lines = [p for p in result_data.paragraphs if p.lines]
|
paragraphs_with_lines = [p for p in result_data.paragraphs if p.lines]
|
||||||
ordered_paragraphs = self._order_paragraphs_by_orientation_and_overlap(paragraphs_with_lines)
|
ordered_paragraphs = self._order_paragraphs(paragraphs_with_lines)
|
||||||
|
|
||||||
for paragraph in ordered_paragraphs:
|
for paragraph in ordered_paragraphs:
|
||||||
paragraph.lines = self._order_lines_by_paragraph_orientation(
|
paragraph.lines = self._order_lines(
|
||||||
paragraph.lines,
|
paragraph.lines,
|
||||||
self._is_paragraph_vertical(paragraph)
|
self._is_paragraph_vertical(paragraph)
|
||||||
)
|
)
|
||||||
@@ -805,7 +805,7 @@ class TextFiltering:
|
|||||||
paragraphs=ordered_paragraphs
|
paragraphs=ordered_paragraphs
|
||||||
)
|
)
|
||||||
|
|
||||||
def _order_lines_by_paragraph_orientation(self, lines, is_paragraph_vertical):
|
def _order_lines(self, lines, is_paragraph_vertical):
|
||||||
if len(lines) <= 1:
|
if len(lines) <= 1:
|
||||||
return lines
|
return lines
|
||||||
|
|
||||||
@@ -833,8 +833,14 @@ class TextFiltering:
|
|||||||
if line_i.bounding_box.center_x < line_j.bounding_box.center_x:
|
if line_i.bounding_box.center_x < line_j.bounding_box.center_x:
|
||||||
should_swap = True
|
should_swap = True
|
||||||
else:
|
else:
|
||||||
# For horizontal paragraphs: order left to right (center_x ascending)
|
# For horizontal paragraphs: check horizontal overlap first
|
||||||
if line_i.bounding_box.center_x > line_j.bounding_box.center_x:
|
horizontal_overlap = self._check_horizontal_overlap(
|
||||||
|
line_i.bounding_box,
|
||||||
|
line_j.bounding_box
|
||||||
|
)
|
||||||
|
|
||||||
|
# Only swap if there's NO horizontal overlap
|
||||||
|
if horizontal_overlap == 0 and line_i.bounding_box.center_x > line_j.bounding_box.center_x:
|
||||||
should_swap = True
|
should_swap = True
|
||||||
|
|
||||||
if should_swap:
|
if should_swap:
|
||||||
@@ -842,7 +848,7 @@ class TextFiltering:
|
|||||||
|
|
||||||
return ordered_lines
|
return ordered_lines
|
||||||
|
|
||||||
def _order_paragraphs_by_orientation_and_overlap(self, paragraphs):
|
def _order_paragraphs(self, paragraphs):
|
||||||
if len(paragraphs) <= 1:
|
if len(paragraphs) <= 1:
|
||||||
return paragraphs
|
return paragraphs
|
||||||
|
|
||||||
@@ -881,8 +887,14 @@ class TextFiltering:
|
|||||||
if para_i.bounding_box.center_x > para_j.bounding_box.center_x:
|
if para_i.bounding_box.center_x > para_j.bounding_box.center_x:
|
||||||
should_swap = True
|
should_swap = True
|
||||||
else:
|
else:
|
||||||
# Both horizontal: order left to right (center_x ascending)
|
# Both horizontal: check horizontal overlap first
|
||||||
if para_i.bounding_box.center_x > para_j.bounding_box.center_x:
|
horizontal_overlap = self._check_horizontal_overlap(
|
||||||
|
para_i.bounding_box,
|
||||||
|
para_j.bounding_box
|
||||||
|
)
|
||||||
|
|
||||||
|
# Only swap if there's NO horizontal overlap
|
||||||
|
if horizontal_overlap == 0 and para_i.bounding_box.center_x > para_j.bounding_box.center_x:
|
||||||
should_swap = True
|
should_swap = True
|
||||||
|
|
||||||
if should_swap:
|
if should_swap:
|
||||||
|
|||||||
Reference in New Issue
Block a user