Remove more duplication/fix rotation for microsoft stuff
This commit is contained in:
183
owocr/ocr.py
183
owocr/ocr.py
@@ -191,12 +191,78 @@ def limit_image_size(img, max_size):
|
|||||||
|
|
||||||
return False, '', (None, None)
|
return False, '', (None, None)
|
||||||
|
|
||||||
|
def quad_to_bounding_box(x1, y1, x2, y2, x3, y3, x4, y4, img_width=None, img_height=None):
|
||||||
|
center_x = (x1 + x2 + x3 + x4) / 4
|
||||||
|
center_y = (y1 + y2 + y3 + y4) / 4
|
||||||
|
|
||||||
|
# Calculate widths using Euclidean distance
|
||||||
|
width1 = sqrt((x2 - x1)**2 + (y2 - y1)**2)
|
||||||
|
width2 = sqrt((x3 - x4)**2 + (y3 - y4)**2)
|
||||||
|
avg_width = (width1 + width2) / 2
|
||||||
|
|
||||||
|
# Calculate heights using Euclidean distance
|
||||||
|
height1 = sqrt((x4 - x1)**2 + (y4 - y1)**2)
|
||||||
|
height2 = sqrt((x3 - x2)**2 + (y3 - y2)**2)
|
||||||
|
avg_height = (height1 + height2) / 2
|
||||||
|
|
||||||
|
# Calculate rotation angle from the first edge
|
||||||
|
dx = x2 - x1
|
||||||
|
dy = y2 - y1
|
||||||
|
angle = atan2(dy, dx)
|
||||||
|
|
||||||
|
if img_width and img_height:
|
||||||
|
center_x = center_x / img_width
|
||||||
|
center_y = center_y / img_height
|
||||||
|
avg_width = avg_width / img_width
|
||||||
|
avg_height = avg_height / img_height
|
||||||
|
|
||||||
|
return BoundingBox(
|
||||||
|
center_x=center_x,
|
||||||
|
center_y=center_y,
|
||||||
|
width=avg_width,
|
||||||
|
height=avg_height,
|
||||||
|
rotation_z=angle
|
||||||
|
)
|
||||||
|
|
||||||
def merge_bounding_boxes(ocr_element_list):
|
def merge_bounding_boxes(ocr_element_list):
|
||||||
all_bboxes = [e.bounding_box for e in ocr_element_list]
|
all_corners = []
|
||||||
min_x = min(b.center_x - b.width / 2 for b in all_bboxes)
|
|
||||||
max_x = max(b.center_x + b.width / 2 for b in all_bboxes)
|
for element in ocr_element_list:
|
||||||
min_y = min(b.center_y - b.height / 2 for b in all_bboxes)
|
bbox = element.bounding_box
|
||||||
max_y = max(b.center_y + b.height / 2 for b in all_bboxes)
|
angle = bbox.rotation_z
|
||||||
|
hw = bbox.width / 2
|
||||||
|
hh = bbox.height / 2
|
||||||
|
|
||||||
|
if not angle:
|
||||||
|
corners = [
|
||||||
|
(bbox.center_x - hw, bbox.center_y - hh), # Top-left
|
||||||
|
(bbox.center_x + hw, bbox.center_y - hh), # Top-right
|
||||||
|
(bbox.center_x + hw, bbox.center_y + hh), # Bottom-right
|
||||||
|
(bbox.center_x - hw, bbox.center_y + hh) # Bottom-left
|
||||||
|
]
|
||||||
|
all_corners.extend(corners)
|
||||||
|
else:
|
||||||
|
local_corners = [
|
||||||
|
(-hw, -hh), # Top-left
|
||||||
|
( hw, -hh), # Top-right
|
||||||
|
( hw, hh), # Bottom-right
|
||||||
|
(-hw, hh) # Bottom-left
|
||||||
|
]
|
||||||
|
|
||||||
|
# Rotate and translate corners
|
||||||
|
cos_angle = cos(angle)
|
||||||
|
sin_angle = sin(angle)
|
||||||
|
|
||||||
|
for x_local, y_local in local_corners:
|
||||||
|
x_rotated = x_local * cos_angle - y_local * sin_angle
|
||||||
|
y_rotated = x_local * sin_angle + y_local * cos_angle
|
||||||
|
x_global = bbox.center_x + x_rotated
|
||||||
|
y_global = bbox.center_y + y_rotated
|
||||||
|
all_corners.append((x_global, y_global))
|
||||||
|
|
||||||
|
xs, ys = zip(*all_corners)
|
||||||
|
min_x, max_x = min(xs), max(xs)
|
||||||
|
min_y, max_y = min(ys), max(ys)
|
||||||
|
|
||||||
return BoundingBox(
|
return BoundingBox(
|
||||||
center_x=(min_x + max_x) / 2,
|
center_x=(min_x + max_x) / 2,
|
||||||
@@ -205,7 +271,6 @@ def merge_bounding_boxes(ocr_element_list):
|
|||||||
height=max_y - min_y
|
height=max_y - min_y
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MangaOcr:
|
class MangaOcr:
|
||||||
name = 'mangaocr'
|
name = 'mangaocr'
|
||||||
readable_name = 'Manga OCR'
|
readable_name = 'Manga OCR'
|
||||||
@@ -549,19 +614,13 @@ class Bing:
|
|||||||
self.available = True
|
self.available = True
|
||||||
logger.info('Bing ready')
|
logger.info('Bing ready')
|
||||||
|
|
||||||
def _quad_to_center_bbox(self, quad):
|
def _convert_bbox(self, quad):
|
||||||
center_x = (quad['topLeft']['x'] + quad['topRight']['x'] + quad['bottomRight']['x'] + quad['bottomLeft']['x']) / 4
|
return quad_to_bounding_box(
|
||||||
center_y = (quad['topLeft']['y'] + quad['topRight']['y'] + quad['bottomRight']['y'] + quad['bottomLeft']['y']) / 4
|
quad['topLeft']['x'], quad['topLeft']['y'],
|
||||||
|
quad['topRight']['x'], quad['topRight']['y'],
|
||||||
width1 = sqrt((quad['topRight']['x'] - quad['topLeft']['x'])**2 + (quad['topRight']['y'] - quad['topLeft']['y'])**2)
|
quad['bottomRight']['x'], quad['bottomRight']['y'],
|
||||||
width2 = sqrt((quad['bottomRight']['x'] - quad['bottomLeft']['x'])**2 + (quad['bottomRight']['y'] - quad['bottomLeft']['y'])**2)
|
quad['bottomLeft']['x'], quad['bottomLeft']['y']
|
||||||
avg_width = (width1 + width2) / 2
|
)
|
||||||
|
|
||||||
height1 = sqrt((quad['bottomLeft']['x'] - quad['topLeft']['x'])**2 + (quad['bottomLeft']['y'] - quad['topLeft']['y'])**2)
|
|
||||||
height2 = sqrt((quad['bottomRight']['x'] - quad['topRight']['x'])**2 + (quad['bottomRight']['y'] - quad['topRight']['y'])**2)
|
|
||||||
avg_height = (height1 + height2) / 2
|
|
||||||
|
|
||||||
return BoundingBox(center_x=center_x, center_y=center_y, width=avg_width, height=avg_height)
|
|
||||||
|
|
||||||
def _to_generic_result(self, response, img_width, img_height, og_img_width, og_img_height):
|
def _to_generic_result(self, response, img_width, img_height, og_img_width, og_img_height):
|
||||||
paragraphs = []
|
paragraphs = []
|
||||||
@@ -586,19 +645,19 @@ class Bing:
|
|||||||
for w in l.get('words', []):
|
for w in l.get('words', []):
|
||||||
word = Word(
|
word = Word(
|
||||||
text=w.get('text', ''),
|
text=w.get('text', ''),
|
||||||
bounding_box=self._quad_to_center_bbox(w['boundingBox'])
|
bounding_box=self._convert_bbox(w['boundingBox'])
|
||||||
)
|
)
|
||||||
words.append(word)
|
words.append(word)
|
||||||
|
|
||||||
line = Line(
|
line = Line(
|
||||||
text=l.get('text', ''),
|
text=l.get('text', ''),
|
||||||
bounding_box=self._quad_to_center_bbox(l['boundingBox']),
|
bounding_box=self._convert_bbox(l['boundingBox']),
|
||||||
words=words
|
words=words
|
||||||
)
|
)
|
||||||
lines.append(line)
|
lines.append(line)
|
||||||
|
|
||||||
paragraph = Paragraph(
|
paragraph = Paragraph(
|
||||||
bounding_box=self._quad_to_center_bbox(p['boundingBox']),
|
bounding_box=self._convert_bbox(p['boundingBox']),
|
||||||
lines=lines
|
lines=lines
|
||||||
)
|
)
|
||||||
paragraphs.append(paragraph)
|
paragraphs.append(paragraph)
|
||||||
@@ -870,11 +929,11 @@ class AppleLiveText:
|
|||||||
if self.result == None:
|
if self.result == None:
|
||||||
return (False, 'Unknown error!')
|
return (False, 'Unknown error!')
|
||||||
|
|
||||||
ocr_response = OcrResult(
|
ocr_result = OcrResult(
|
||||||
image_properties=ImageProperties(width=img.width, height=img.height),
|
image_properties=ImageProperties(width=img.width, height=img.height),
|
||||||
paragraphs=self.result
|
paragraphs=self.result
|
||||||
)
|
)
|
||||||
x = (True, ocr_response)
|
x = (True, ocr_result)
|
||||||
|
|
||||||
if is_path:
|
if is_path:
|
||||||
img.close()
|
img.close()
|
||||||
@@ -1070,21 +1129,13 @@ class OneOCR:
|
|||||||
except:
|
except:
|
||||||
logger.warning('Error reading URL from config, OneOCR will not work!')
|
logger.warning('Error reading URL from config, OneOCR will not work!')
|
||||||
|
|
||||||
def _pixel_quad_to_center_bbox(self, rect, img_width, img_height):
|
def _convert_bbox(self, rect, img_width, img_height):
|
||||||
x_coords = [rect['x1'], rect['x2'], rect['x3'], rect['x4']]
|
return quad_to_bounding_box(
|
||||||
y_coords = [rect['y1'], rect['y2'], rect['y3'], rect['y4']]
|
rect['x1'], rect['y1'],
|
||||||
|
rect['x2'], rect['y2'],
|
||||||
center_x_px = sum(x_coords) / 4
|
rect['x3'], rect['y3'],
|
||||||
center_y_px = sum(y_coords) / 4
|
rect['x4'], rect['y4'],
|
||||||
|
img_width, img_height
|
||||||
width_px = (abs(rect['x2'] - rect['x1']) + abs(rect['x3'] - rect['x4'])) / 2
|
|
||||||
height_px = (abs(rect['y4'] - rect['y1']) + abs(rect['y3'] - rect['y2'])) / 2
|
|
||||||
|
|
||||||
return BoundingBox(
|
|
||||||
center_x=center_x_px / img_width,
|
|
||||||
center_y=center_y_px / img_height,
|
|
||||||
width=width_px / img_width,
|
|
||||||
height=height_px / img_height
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _to_generic_result(self, response, img_width, img_height, og_img_width, og_img_height):
|
def _to_generic_result(self, response, img_width, img_height, og_img_width, og_img_height):
|
||||||
@@ -1094,13 +1145,13 @@ class OneOCR:
|
|||||||
for i, w in enumerate(l.get('words', [])):
|
for i, w in enumerate(l.get('words', [])):
|
||||||
word = Word(
|
word = Word(
|
||||||
text=w.get('text', ''),
|
text=w.get('text', ''),
|
||||||
bounding_box=self._pixel_quad_to_center_bbox(w['bounding_rect'], img_width, img_height)
|
bounding_box=self._convert_bbox(w['bounding_rect'], img_width, img_height)
|
||||||
)
|
)
|
||||||
words.append(word)
|
words.append(word)
|
||||||
|
|
||||||
line = Line(
|
line = Line(
|
||||||
text=l.get('text', ''),
|
text=l.get('text', ''),
|
||||||
bounding_box=self._pixel_quad_to_center_bbox(l['bounding_rect'], img_width, img_height),
|
bounding_box=self._convert_bbox(l['bounding_rect'], img_width, img_height),
|
||||||
words=words
|
words=words
|
||||||
)
|
)
|
||||||
lines.append(line)
|
lines.append(line)
|
||||||
@@ -1147,8 +1198,8 @@ class OneOCR:
|
|||||||
if 'error' in raw_res:
|
if 'error' in raw_res:
|
||||||
return (False, raw_res['error'])
|
return (False, raw_res['error'])
|
||||||
|
|
||||||
ocr_response = self._to_generic_result(raw_res, img_width, img_height, img.width, img.height)
|
ocr_result = self._to_generic_result(raw_res, img_width, img_height, img.width, img.height)
|
||||||
x = (True, ocr_response)
|
x = (True, ocr_result)
|
||||||
|
|
||||||
if is_path:
|
if is_path:
|
||||||
img.close()
|
img.close()
|
||||||
@@ -1262,22 +1313,13 @@ class EasyOCR:
|
|||||||
self.available = True
|
self.available = True
|
||||||
logger.info('EasyOCR ready')
|
logger.info('EasyOCR ready')
|
||||||
|
|
||||||
def _pixel_quad_to_center_bbox(self, rect, img_width, img_height):
|
def _convert_bbox(self, rect, img_width, img_height):
|
||||||
x_coords = [float(point[0]) for point in rect]
|
x1, y1 = float(rect[0][0]), float(rect[0][1])
|
||||||
y_coords = [float(point[1]) for point in rect]
|
x2, y2 = float(rect[1][0]), float(rect[1][1])
|
||||||
|
x3, y3 = float(rect[2][0]), float(rect[2][1])
|
||||||
|
x4, y4 = float(rect[3][0]), float(rect[3][1])
|
||||||
|
|
||||||
center_x_px = sum(x_coords) / 4
|
return quad_to_bounding_box(x1, y1, x2, y2, x3, y3, x4, y4, img_width, img_height)
|
||||||
center_y_px = sum(y_coords) / 4
|
|
||||||
|
|
||||||
width_px = (abs(float(rect[1][0]) - float(rect[0][0])) + abs(float(rect[2][0]) - float(rect[3][0]))) / 2
|
|
||||||
height_px = (abs(float(rect[3][1]) - float(rect[0][1])) + abs(float(rect[2][1]) - float(rect[1][1]))) / 2
|
|
||||||
|
|
||||||
return BoundingBox(
|
|
||||||
center_x=center_x_px / img_width,
|
|
||||||
center_y=center_y_px / img_height,
|
|
||||||
width=width_px / img_width,
|
|
||||||
height=height_px / img_height
|
|
||||||
)
|
|
||||||
|
|
||||||
def _to_generic_result(self, response, img_width, img_height):
|
def _to_generic_result(self, response, img_width, img_height):
|
||||||
lines = []
|
lines = []
|
||||||
@@ -1286,7 +1328,7 @@ class EasyOCR:
|
|||||||
quad_coords = detection[0]
|
quad_coords = detection[0]
|
||||||
text = detection[1]
|
text = detection[1]
|
||||||
|
|
||||||
bbox = self._pixel_quad_to_center_bbox(quad_coords, img_width, img_height)
|
bbox = self._convert_bbox(quad_coords, img_width, img_height)
|
||||||
word = Word(text=text, bounding_box=bbox)
|
word = Word(text=text, bounding_box=bbox)
|
||||||
line = Line(bounding_box=bbox, words=[word], text=text)
|
line = Line(bounding_box=bbox, words=[word], text=text)
|
||||||
lines.append(line)
|
lines.append(line)
|
||||||
@@ -1365,22 +1407,13 @@ class RapidOCR:
|
|||||||
else:
|
else:
|
||||||
return LangRec.LATIN
|
return LangRec.LATIN
|
||||||
|
|
||||||
def _pixel_quad_to_center_bbox(self, rect, img_width, img_height):
|
def _convert_bbox(self, rect, img_width, img_height):
|
||||||
x_coords = [float(point[0]) for point in rect]
|
x1, y1 = float(rect[0][0]), float(rect[0][1])
|
||||||
y_coords = [float(point[1]) for point in rect]
|
x2, y2 = float(rect[1][0]), float(rect[1][1])
|
||||||
|
x3, y3 = float(rect[2][0]), float(rect[2][1])
|
||||||
|
x4, y4 = float(rect[3][0]), float(rect[3][1])
|
||||||
|
|
||||||
center_x_px = sum(x_coords) / 4
|
return quad_to_bounding_box(x1, y1, x2, y2, x3, y3, x4, y4, img_width, img_height)
|
||||||
center_y_px = sum(y_coords) / 4
|
|
||||||
|
|
||||||
width_px = (abs(float(rect[1][0]) - float(rect[0][0])) + abs(float(rect[2][0]) - float(rect[3][0]))) / 2
|
|
||||||
height_px = (abs(float(rect[3][1]) - float(rect[0][1])) + abs(float(rect[2][1]) - float(rect[1][1]))) / 2
|
|
||||||
|
|
||||||
return BoundingBox(
|
|
||||||
center_x=center_x_px / img_width,
|
|
||||||
center_y=center_y_px / img_height,
|
|
||||||
width=width_px / img_width,
|
|
||||||
height=height_px / img_height
|
|
||||||
)
|
|
||||||
|
|
||||||
def _to_generic_result(self, response, img_width, img_height):
|
def _to_generic_result(self, response, img_width, img_height):
|
||||||
lines = []
|
lines = []
|
||||||
@@ -1388,7 +1421,7 @@ class RapidOCR:
|
|||||||
for i in range(len(response.boxes)):
|
for i in range(len(response.boxes)):
|
||||||
box = response.boxes[i]
|
box = response.boxes[i]
|
||||||
text = response.txts[i]
|
text = response.txts[i]
|
||||||
bbox = self._pixel_quad_to_center_bbox(box, img_width, img_height)
|
bbox = self._convert_bbox(box, img_width, img_height)
|
||||||
word = Word(text=text, bounding_box=bbox)
|
word = Word(text=text, bounding_box=bbox)
|
||||||
line = Line(bounding_box=bbox, words=[word], text=text)
|
line = Line(bounding_box=bbox, words=[word], text=text)
|
||||||
lines.append(line)
|
lines.append(line)
|
||||||
|
|||||||
Reference in New Issue
Block a user