Add optional line text field to coordinate format (helps with text output format in some languages/corner cases)

2025-10-06 02:07:44 +02:00
parent 9540d150a2
commit 0436ef9593
2 changed files with 13 additions and 6 deletions
--- a/owocr/ocr.py
+++ b/owocr/ocr.py
@@ -110,6 +110,7 @@ class Line:
    """Represents a single line of text, composed of words."""
    bounding_box: BoundingBox
    words: List[Word] = field(default_factory=list)
    text: Optional[str] = None
@dataclass
 class Paragraph:
@@ -567,6 +568,7 @@ class Bing:
                            words.append(word)
                        line = Line(
                            text=l.get('text', ''),
                            bounding_box=self._quad_to_center_bbox(l['boundingBox']),
                            words=words
                        )
@@ -840,6 +842,7 @@ class AppleLiveText:
                l_bbox = l.quad().boundingBox()
                line = Line(
                    text=l.string(),
                    bounding_box=BoundingBox(
                        width=l_bbox.size.width,
                        height=l_bbox.size.height,
@@ -996,6 +999,7 @@ class OneOCR:
                words.append(word)
            line = Line(
                text=l.get('text', ''),
                bounding_box=self._pixel_quad_to_center_bbox(l['bounding_rect'], img_width, img_height),
                words=words
            )
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -729,12 +729,15 @@ class OutputResult:
        full_text_parts = []
        for p in result_data.paragraphs:
            for l in p.lines:
-                for w in l.words:
+                if l.text != None:
-                    full_text_parts.append(w.text)
+                    full_text_parts.append(l.text)
-                    if w.separator != None:
+                else:
-                        full_text_parts.append(w.separator)
+                    for w in l.words:
-                    else:
+                        full_text_parts.append(w.text)
-                        full_text_parts.append(' ')
+                        if w.separator != None:
                            full_text_parts.append(w.separator)
                        else:
                            full_text_parts.append(' ')
                full_text_parts.append('\n')
        return "".join(full_text_parts)