diff --git a/owocr/ocr.py b/owocr/ocr.py
index 7928b4e..bef28f8 100644
--- a/owocr/ocr.py
+++ b/owocr/ocr.py
@@ -530,7 +530,7 @@ class MangaOcrSegmented:
lines.append(line)
p_bbox = self._convert_box_bbox(list(blk.xyxy), img_width, img_height)
- writing_direction = 'TOP_TO_BOTTOM' if blk.vertical else None
+ writing_direction = 'TOP_TO_BOTTOM' if blk.vertical else "LEFT_TO_RIGHT"
paragraph = Paragraph(bounding_box=p_bbox, lines=lines, writing_direction=writing_direction)
paragraphs.append(paragraph)
diff --git a/owocr/run.py b/owocr/run.py
index ca971e3..19b5006 100644
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -622,7 +622,7 @@ class TextFiltering:
if current_lines_ocr:
if i2 >= 0:
- is_furigana = self._furigana_filter(current_lines[len_recovered_lines:], current_lines_ocr, current_text, i2)
+ is_furigana = self._furigana_filter(current_lines[len_recovered_lines:], current_lines_ocr, i2)
if is_furigana:
continue
@@ -643,71 +643,75 @@ class TextFiltering:
return changed_lines, changed_lines_count
- def _furigana_filter(self, current_lines, current_lines_ocr, current_text, i):
- has_kanji = self.kanji_regex.search(current_text)
+ def _furigana_filter(self, current_lines, current_lines_ocr, i):
+ current_line_text = current_lines[i]
+ has_kanji = self.kanji_regex.search(current_line_text)
if has_kanji:
return False
is_furigana = False
current_line_bbox = current_lines_ocr[i].bounding_box
- for j in range(len(current_lines_ocr)):
- if i == j:
- continue
+ for j in range(i + 1, len(current_lines_ocr)):
if not current_lines[j]:
continue
other_line_text = current_lines[j]
other_line_bbox = current_lines_ocr[j].bounding_box
- if len(current_text) <= len(other_line_text):
- is_vertical = other_line_bbox.height > other_line_bbox.width
+ if len(current_line_text) <= len(other_line_text):
+ aspect_ratio = other_line_bbox.width / other_line_bbox.height
else:
- is_vertical = current_line_bbox.height > current_line_bbox.width
+ aspect_ratio = current_line_bbox.width / current_line_bbox.height
+ is_vertical = aspect_ratio < 0.8
- logger.opt(colors=True).debug(f"Furigana check against line: '{other_line_text}'")
+ logger.opt(colors=True).debug(f"Furigana check against line: '{other_line_text}' vertical: '{is_vertical}'")
if is_vertical:
- width_threshold = other_line_bbox.width * 0.7
- is_smaller = current_line_bbox.width < width_threshold
- logger.opt(colors=True).debug(f"Vertical furigana check width: '{other_line_bbox.width}' '{current_line_bbox.width}'")
- else:
- height_threshold = other_line_bbox.height * 0.85
- is_smaller = current_line_bbox.height < height_threshold
- logger.opt(colors=True).debug(f"Horizontal furigana check height: '{other_line_bbox.height}' '{current_line_bbox.height}'")
+ min_h_distance = abs(other_line_bbox.width - current_line_bbox.width) / 2
+ max_h_distance = other_line_bbox.width + current_line_bbox.width
+ min_v_overlap = 0.4
- if not is_smaller:
- continue
-
- # Check if the line has kanji
- other_has_kanji = self.kanji_regex.search(other_line_text)
- if not other_has_kanji:
- continue
-
- if is_vertical:
- horizontal_threshold = (current_line_bbox.width + other_line_bbox.width) * 0.7
horizontal_distance = current_line_bbox.center_x - other_line_bbox.center_x
vertical_overlap = self._check_vertical_overlap(current_line_bbox, other_line_bbox)
- logger.opt(colors=True).debug(f"Vertical furigana check position: '{horizontal_threshold}' '{horizontal_distance}' '{vertical_overlap}'")
+ logger.opt(colors=True).debug(f"Vertical furigana: min h.dist '{min_h_distance:.4f}' max h.dist '{max_h_distance:.4f}' h.dist '{horizontal_distance:.4f}' v.overlap '{vertical_overlap:.4f}'")
- # If horizontally close and vertically aligned, it's likely furigana
- if (0 < horizontal_distance < horizontal_threshold and vertical_overlap > 0.4):
- is_furigana = True
- logger.opt(colors=True).debug(f"Skipping vertical furigana line: '{current_text}' next to line: '{other_line_text}'")
- break
+ passed_position_check = min_h_distance < horizontal_distance < max_h_distance and vertical_overlap > min_v_overlap
else:
- vertical_threshold = other_line_bbox.height + current_line_bbox.height
+ min_v_distance = abs(other_line_bbox.height - current_line_bbox.height) / 2
+ max_v_distance = other_line_bbox.height + current_line_bbox.height
+ min_h_overlap = 0.4
+
vertical_distance = other_line_bbox.center_y - current_line_bbox.center_y
horizontal_overlap = self._check_horizontal_overlap(current_line_bbox, other_line_bbox)
- logger.opt(colors=True).debug(f"Horizontal furigana check position: '{vertical_threshold}' '{vertical_distance}' '{horizontal_overlap}'")
+ logger.opt(colors=True).debug(f"Horizontal furigana: min v.dist '{min_v_distance:.4f}' max v.dist '{max_v_distance:.4f}' v.dist '{vertical_distance:.4f}' h.overlap '{horizontal_overlap:.4f}'")
- # If vertically close and horizontally aligned, it's likely furigana
- if (0 < vertical_distance < vertical_threshold and horizontal_overlap > 0.4):
- is_furigana = True
- logger.opt(colors=True).debug(f"Skipping horizontal furigana line: '{current_text}' above line: '{other_line_text}'")
- break
+ passed_position_check = min_v_distance < vertical_distance < max_v_distance and horizontal_overlap > min_h_overlap
+
+ if not passed_position_check:
+ logger.opt(colors=True).debug(f"Not overlapping line found: '{other_line_text}', continuing")
+ continue
+
+ other_has_kanji = self.kanji_regex.search(other_line_text)
+ if not other_has_kanji:
+ break
+
+ if is_vertical:
+ width_threshold = other_line_bbox.width * 0.77
+ is_smaller = current_line_bbox.width < width_threshold
+ logger.opt(colors=True).debug(f"Vertical furigana width: kanji '{other_line_bbox.width:.4f}' kana '{current_line_bbox.width:.4f}' max kana '{width_threshold:.4f}'")
+ else:
+ height_threshold = other_line_bbox.height * 0.85
+ is_smaller = current_line_bbox.height < height_threshold
+ logger.opt(colors=True).debug(f"Horizontal furigana width: kanji '{other_line_bbox.height:.4f}' kana '{current_line_bbox.height:.4f}' max kana '{height_threshold:.4f}'")
+
+ if is_smaller:
+ is_furigana = True
+ logger.opt(colors=True).debug(f"Skipping furigana line: '{current_line_text}' next to line: '{other_line_text}'")
+
+ break
return is_furigana
@@ -741,7 +745,7 @@ class TextFiltering:
logger.opt(colors=True).debug(f"Line: '{text}'")
- is_furigana = self._furigana_filter(lines, lines_ocr, text, i)
+ is_furigana = self._furigana_filter(lines, lines_ocr, i)
if is_furigana:
continue
@@ -783,6 +787,126 @@ class TextFiltering:
return current_line
+ def order_paragraphs_and_lines(self, result_data):
+ if not result_data.paragraphs:
+ return result_data
+
+ paragraphs_with_lines = [p for p in result_data.paragraphs if p.lines]
+ ordered_paragraphs = self._order_paragraphs_by_orientation_and_overlap(paragraphs_with_lines)
+
+ for paragraph in ordered_paragraphs:
+ paragraph.lines = self._order_lines_by_paragraph_orientation(
+ paragraph.lines,
+ self._is_paragraph_vertical(paragraph)
+ )
+
+ return OcrResult(
+ image_properties=result_data.image_properties,
+ paragraphs=ordered_paragraphs
+ )
+
+ def _order_lines_by_paragraph_orientation(self, lines, is_paragraph_vertical):
+ if len(lines) <= 1:
+ return lines
+
+ ordered_lines = list(lines)
+
+ # Sort primarily by vertical position (top to bottom)
+ ordered_lines.sort(key=lambda line: line.bounding_box.center_y)
+
+ # Now adjust ordering based on overlap and paragraph orientation
+ for i in range(len(ordered_lines)):
+ for j in range(i + 1, len(ordered_lines)):
+ line_i = ordered_lines[i]
+ line_j = ordered_lines[j]
+
+ vertical_overlap = self._check_vertical_overlap(
+ line_i.bounding_box,
+ line_j.bounding_box
+ )
+
+ if vertical_overlap > 0: # Lines overlap vertically
+ should_swap = False
+
+ if is_paragraph_vertical:
+ # For vertical paragraphs: order right to left (center_x descending)
+ if line_i.bounding_box.center_x < line_j.bounding_box.center_x:
+ should_swap = True
+ else:
+ # For horizontal paragraphs: order left to right (center_x ascending)
+ if line_i.bounding_box.center_x > line_j.bounding_box.center_x:
+ should_swap = True
+
+ if should_swap:
+ ordered_lines[i], ordered_lines[j] = ordered_lines[j], ordered_lines[i]
+
+ return ordered_lines
+
+ def _order_paragraphs_by_orientation_and_overlap(self, paragraphs):
+ if len(paragraphs) <= 1:
+ return paragraphs
+
+ ordered_paragraphs = list(paragraphs)
+
+ # Sort primarily by vertical position (top to bottom)
+ ordered_paragraphs.sort(key=lambda p: p.bounding_box.center_y)
+
+ # Now adjust ordering based on overlap and orientation
+ for i in range(len(ordered_paragraphs)):
+ for j in range(i + 1, len(ordered_paragraphs)):
+ para_i = ordered_paragraphs[i]
+ para_j = ordered_paragraphs[j]
+
+ vertical_overlap = self._check_vertical_overlap(
+ para_i.bounding_box,
+ para_j.bounding_box
+ )
+
+ if vertical_overlap > 0: # Paragraphs overlap vertically
+ is_vertical_i = self._is_paragraph_vertical(para_i)
+ is_vertical_j = self._is_paragraph_vertical(para_j)
+
+ should_swap = False
+
+ if is_vertical_i and is_vertical_j:
+ # Both vertical: order right to left (center_x descending)
+ if para_i.bounding_box.center_x < para_j.bounding_box.center_x:
+ should_swap = True
+ elif is_vertical_i and not is_vertical_j:
+ # Vertical with horizontal: order left to right (center_x ascending)
+ if para_i.bounding_box.center_x > para_j.bounding_box.center_x:
+ should_swap = True
+ elif not is_vertical_i and is_vertical_j:
+ # Horizontal with vertical: order left to right (center_x ascending)
+ if para_i.bounding_box.center_x > para_j.bounding_box.center_x:
+ should_swap = True
+ else:
+ # Both horizontal: order left to right (center_x ascending)
+ if para_i.bounding_box.center_x > para_j.bounding_box.center_x:
+ should_swap = True
+
+ if should_swap:
+ ordered_paragraphs[i], ordered_paragraphs[j] = ordered_paragraphs[j], ordered_paragraphs[i]
+
+ return ordered_paragraphs
+
+ def _is_paragraph_vertical(self, paragraph):
+ if paragraph.writing_direction:
+ if paragraph.writing_direction == "TOP_TO_BOTTOM":
+ return True
+ return False
+
+ total_aspect_ratio = 0.0
+
+ for line in paragraph.lines:
+ bbox = line.bounding_box
+ aspect_ratio = bbox.width / bbox.height
+ total_aspect_ratio += aspect_ratio
+
+ average_aspect_ratio = total_aspect_ratio / len(paragraph.lines)
+
+ return average_aspect_ratio < 0.8 # Threshold for vertical text
+
def _check_horizontal_overlap(self, bbox1, bbox2):
# Calculate left and right boundaries for both boxes
left1 = bbox1.center_x - bbox1.width / 2
@@ -1505,6 +1629,7 @@ class OutputResult:
return
if isinstance(result_data, OcrResult):
+ result_data = self.filtering.order_paragraphs_and_lines(result_data)
result_data_text = self._extract_lines_from_result(result_data)
else:
result_data_text = result_data