From 0436ef95937185c8221acea84f00d2f3913d074e Mon Sep 17 00:00:00 2001 From: AuroraWright Date: Mon, 6 Oct 2025 02:07:44 +0200 Subject: [PATCH] Add optional line text field to coordinate format (helps with text output format in some languages/corner cases) --- owocr/ocr.py | 4 ++++ owocr/run.py | 15 +++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/owocr/ocr.py b/owocr/ocr.py index e744c4b..43a1235 100644 --- a/owocr/ocr.py +++ b/owocr/ocr.py @@ -110,6 +110,7 @@ class Line: """Represents a single line of text, composed of words.""" bounding_box: BoundingBox words: List[Word] = field(default_factory=list) + text: Optional[str] = None @dataclass class Paragraph: @@ -567,6 +568,7 @@ class Bing: words.append(word) line = Line( + text=l.get('text', ''), bounding_box=self._quad_to_center_bbox(l['boundingBox']), words=words ) @@ -840,6 +842,7 @@ class AppleLiveText: l_bbox = l.quad().boundingBox() line = Line( + text=l.string(), bounding_box=BoundingBox( width=l_bbox.size.width, height=l_bbox.size.height, @@ -996,6 +999,7 @@ class OneOCR: words.append(word) line = Line( + text=l.get('text', ''), bounding_box=self._pixel_quad_to_center_bbox(l['bounding_rect'], img_width, img_height), words=words ) diff --git a/owocr/run.py b/owocr/run.py index d4c2d0c..ca11c47 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -729,12 +729,15 @@ class OutputResult: full_text_parts = [] for p in result_data.paragraphs: for l in p.lines: - for w in l.words: - full_text_parts.append(w.text) - if w.separator != None: - full_text_parts.append(w.separator) - else: - full_text_parts.append(' ') + if l.text != None: + full_text_parts.append(l.text) + else: + for w in l.words: + full_text_parts.append(w.text) + if w.separator != None: + full_text_parts.append(w.separator) + else: + full_text_parts.append(' ') full_text_parts.append('\n') return "".join(full_text_parts)