Add optional line text field to coordinate format (helps with text output format in some languages/corner cases)

2025-10-06 02:07:44 +02:00
parent 9540d150a2
commit 0436ef9593
2 changed files with 13 additions and 6 deletions
--- a/owocr/ocr.py
+++ b/owocr/ocr.py
@@ -110,6 +110,7 @@ class Line:
    """Represents a single line of text, composed of words."""
    bounding_box: BoundingBox
    words: List[Word] = field(default_factory=list)
+    text: Optional[str] = None

@dataclass
 class Paragraph:
@@ -567,6 +568,7 @@ class Bing:
                            words.append(word)

                        line = Line(
+                            text=l.get('text', ''),
                            bounding_box=self._quad_to_center_bbox(l['boundingBox']),
                            words=words
                        )
@@ -840,6 +842,7 @@ class AppleLiveText:
                
                l_bbox = l.quad().boundingBox()
                line = Line(
+                    text=l.string(),
                    bounding_box=BoundingBox(
                        width=l_bbox.size.width,
                        height=l_bbox.size.height,
@@ -996,6 +999,7 @@ class OneOCR:
                words.append(word)
            
            line = Line(
+                text=l.get('text', ''),
                bounding_box=self._pixel_quad_to_center_bbox(l['bounding_rect'], img_width, img_height),
                words=words
            )
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -729,6 +729,9 @@ class OutputResult:
        full_text_parts = []
        for p in result_data.paragraphs:
            for l in p.lines:
+                if l.text != None:
+                    full_text_parts.append(l.text)
+                else:
                    for w in l.words:
                        full_text_parts.append(w.text)
                        if w.separator != None: