Add optional line text field to coordinate format (helps with text output format in some languages/corner cases)

This commit is contained in:
AuroraWright
2025-10-06 02:07:44 +02:00
parent 9540d150a2
commit 0436ef9593
2 changed files with 13 additions and 6 deletions

View File

@@ -110,6 +110,7 @@ class Line:
"""Represents a single line of text, composed of words.""" """Represents a single line of text, composed of words."""
bounding_box: BoundingBox bounding_box: BoundingBox
words: List[Word] = field(default_factory=list) words: List[Word] = field(default_factory=list)
text: Optional[str] = None
@dataclass @dataclass
class Paragraph: class Paragraph:
@@ -567,6 +568,7 @@ class Bing:
words.append(word) words.append(word)
line = Line( line = Line(
text=l.get('text', ''),
bounding_box=self._quad_to_center_bbox(l['boundingBox']), bounding_box=self._quad_to_center_bbox(l['boundingBox']),
words=words words=words
) )
@@ -840,6 +842,7 @@ class AppleLiveText:
l_bbox = l.quad().boundingBox() l_bbox = l.quad().boundingBox()
line = Line( line = Line(
text=l.string(),
bounding_box=BoundingBox( bounding_box=BoundingBox(
width=l_bbox.size.width, width=l_bbox.size.width,
height=l_bbox.size.height, height=l_bbox.size.height,
@@ -996,6 +999,7 @@ class OneOCR:
words.append(word) words.append(word)
line = Line( line = Line(
text=l.get('text', ''),
bounding_box=self._pixel_quad_to_center_bbox(l['bounding_rect'], img_width, img_height), bounding_box=self._pixel_quad_to_center_bbox(l['bounding_rect'], img_width, img_height),
words=words words=words
) )

View File

@@ -729,12 +729,15 @@ class OutputResult:
full_text_parts = [] full_text_parts = []
for p in result_data.paragraphs: for p in result_data.paragraphs:
for l in p.lines: for l in p.lines:
for w in l.words: if l.text != None:
full_text_parts.append(w.text) full_text_parts.append(l.text)
if w.separator != None: else:
full_text_parts.append(w.separator) for w in l.words:
else: full_text_parts.append(w.text)
full_text_parts.append(' ') if w.separator != None:
full_text_parts.append(w.separator)
else:
full_text_parts.append(' ')
full_text_parts.append('\n') full_text_parts.append('\n')
return "".join(full_text_parts) return "".join(full_text_parts)