Fix Bing (thanks to https://github.com/HIllya51/LunaTranslator for the China fix), fix Bing image size limits and more robust resizing for OCRSpace too
This commit is contained in:
130
owocr/ocr.py
130
owocr/ocr.py
@@ -6,6 +6,7 @@ import sys
|
|||||||
import platform
|
import platform
|
||||||
import logging
|
import logging
|
||||||
from math import sqrt
|
from math import sqrt
|
||||||
|
import json
|
||||||
import base64
|
import base64
|
||||||
from urllib.parse import urlparse, parse_qs
|
from urllib.parse import urlparse, parse_qs
|
||||||
|
|
||||||
@@ -90,15 +91,15 @@ def post_process(text):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def pil_image_to_bytes(img, img_format='png', png_compression=6, jpeg_quality=80):
|
def pil_image_to_bytes(img, img_format='png', png_compression=6, jpeg_quality=80, optimize=False):
|
||||||
if img_format == 'png' and optimized_png_encode:
|
if img_format == 'png' and optimized_png_encode and not optimize:
|
||||||
raw_data = img.convert('RGBA').tobytes()
|
raw_data = img.convert('RGBA').tobytes()
|
||||||
image_bytes = fpng_py.fpng_encode_image_to_memory(raw_data, img.width, img.height)
|
image_bytes = fpng_py.fpng_encode_image_to_memory(raw_data, img.width, img.height)
|
||||||
else:
|
else:
|
||||||
image_bytes = io.BytesIO()
|
image_bytes = io.BytesIO()
|
||||||
if img_format == 'jpeg':
|
if img_format == 'jpeg':
|
||||||
img = img.convert('RGB')
|
img = img.convert('RGB')
|
||||||
img.save(image_bytes, format=img_format, compress_level=png_compression, quality=jpeg_quality)
|
img.save(image_bytes, format=img_format, compress_level=png_compression, quality=jpeg_quality, optimize=optimize, subsampling=0)
|
||||||
image_bytes = image_bytes.getvalue()
|
image_bytes = image_bytes.getvalue()
|
||||||
return image_bytes
|
return image_bytes
|
||||||
|
|
||||||
@@ -107,6 +108,36 @@ def pil_image_to_numpy_array(img):
|
|||||||
return np.array(img.convert('RGBA'))
|
return np.array(img.convert('RGBA'))
|
||||||
|
|
||||||
|
|
||||||
|
def limit_image_size(img, max_size):
|
||||||
|
img_bytes = pil_image_to_bytes(img)
|
||||||
|
if len(img_bytes) <= max_size:
|
||||||
|
return img_bytes, 'png'
|
||||||
|
|
||||||
|
scaling_factor = 0.60 if any(x > 2000 for x in img.size) else 0.75
|
||||||
|
new_w = int(img.width * scaling_factor)
|
||||||
|
new_h = int(img.height * scaling_factor)
|
||||||
|
resized_img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
|
||||||
|
resized_img_bytes = pil_image_to_bytes(resized_img)
|
||||||
|
if len(resized_img_bytes) <= max_size:
|
||||||
|
return resized_img_bytes, 'png'
|
||||||
|
|
||||||
|
jpeg_quality = 80
|
||||||
|
while jpeg_quality >= 60:
|
||||||
|
jpeg_buffer = pil_image_to_bytes(img, 'jpeg', jpeg_quality=jpeg_quality, optimize=True)
|
||||||
|
if len(jpeg_buffer) <= max_size:
|
||||||
|
return jpeg_buffer, 'jpeg'
|
||||||
|
jpeg_quality -= 5
|
||||||
|
|
||||||
|
jpeg_quality = 80
|
||||||
|
while jpeg_quality >= 60:
|
||||||
|
jpeg_buffer = pil_image_to_bytes(resized_img, 'jpeg', jpeg_quality=jpeg_quality, optimize=True)
|
||||||
|
if len(jpeg_buffer) <= max_size:
|
||||||
|
return jpeg_buffer, 'jpeg'
|
||||||
|
jpeg_quality -= 5
|
||||||
|
|
||||||
|
return False, ''
|
||||||
|
|
||||||
|
|
||||||
class MangaOcr:
|
class MangaOcr:
|
||||||
name = 'mangaocr'
|
name = 'mangaocr'
|
||||||
readable_name = 'Manga OCR'
|
readable_name = 'Manga OCR'
|
||||||
@@ -386,6 +417,10 @@ class Bing:
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||||
|
|
||||||
|
img_bytes = self._preprocess(img)
|
||||||
|
if not img_bytes:
|
||||||
|
return (False, 'Image is too big!')
|
||||||
|
|
||||||
upload_url = 'https://www.bing.com/images/search?view=detailv2&iss=sbiupload'
|
upload_url = 'https://www.bing.com/images/search?view=detailv2&iss=sbiupload'
|
||||||
upload_headers = {
|
upload_headers = {
|
||||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||||
@@ -397,22 +432,27 @@ class Bing:
|
|||||||
files = {
|
files = {
|
||||||
'imgurl': (None, ''),
|
'imgurl': (None, ''),
|
||||||
'cbir': (None, 'sbi'),
|
'cbir': (None, 'sbi'),
|
||||||
'imageBin': (None, self._preprocess(img))
|
'imageBin': (None, img_bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
for _ in range(2):
|
||||||
res = self.requests_session.post(upload_url, headers=upload_headers, files=files, timeout=20, allow_redirects=False)
|
api_host = urlparse(upload_url).netloc
|
||||||
except requests.exceptions.Timeout:
|
try:
|
||||||
return (False, 'Request timeout!')
|
res = self.requests_session.post(upload_url, headers=upload_headers, files=files, timeout=20, allow_redirects=False)
|
||||||
except requests.exceptions.ConnectionError:
|
except requests.exceptions.Timeout:
|
||||||
return (False, 'Connection error!')
|
return (False, 'Request timeout!')
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
return (False, 'Connection error!')
|
||||||
|
|
||||||
if res.status_code != 302:
|
if res.status_code != 302:
|
||||||
return (False, 'Unknown error!')
|
return (False, 'Unknown error!')
|
||||||
|
|
||||||
redirect_url = res.headers.get('Location')
|
redirect_url = res.headers.get('Location')
|
||||||
if not redirect_url:
|
if not redirect_url:
|
||||||
return (False, 'Error getting redirect URL!')
|
return (False, 'Error getting redirect URL!')
|
||||||
|
if not redirect_url.startswith('https://'):
|
||||||
|
break
|
||||||
|
upload_url = redirect_url
|
||||||
|
|
||||||
parsed_url = urlparse(redirect_url)
|
parsed_url = urlparse(redirect_url)
|
||||||
query_params = parse_qs(parsed_url.query)
|
query_params = parse_qs(parsed_url.query)
|
||||||
@@ -422,7 +462,7 @@ class Bing:
|
|||||||
return (False, 'Error getting token!')
|
return (False, 'Error getting token!')
|
||||||
image_insights_token = image_insights_token[0]
|
image_insights_token = image_insights_token[0]
|
||||||
|
|
||||||
api_url = 'https://www.bing.com/images/api/custom/knowledge'
|
api_url = f'https://{api_host}/images/api/custom/knowledge'
|
||||||
api_headers = {
|
api_headers = {
|
||||||
'accept': '*/*',
|
'accept': '*/*',
|
||||||
'accept-language': 'ja-JP;q=0.6,ja;q=0.5',
|
'accept-language': 'ja-JP;q=0.6,ja;q=0.5',
|
||||||
@@ -450,35 +490,44 @@ class Bing:
|
|||||||
|
|
||||||
data = res.json()
|
data = res.json()
|
||||||
|
|
||||||
|
res = ''
|
||||||
text_tag = None
|
text_tag = None
|
||||||
for tag in data['tags']:
|
for tag in data['tags']:
|
||||||
if tag.get('displayName') == '##TextRecognition':
|
if tag.get('displayName') == '##TextRecognition':
|
||||||
text_tag = tag
|
text_tag = tag
|
||||||
break
|
break
|
||||||
if not text_tag:
|
if text_tag:
|
||||||
return (False, 'No ##TextRecognition tag in response!')
|
text_action = None
|
||||||
|
for action in text_tag['actions']:
|
||||||
text_action = None
|
if action.get('_type') == 'ImageKnowledge/TextRecognitionAction':
|
||||||
for action in text_tag['actions']:
|
text_action = action
|
||||||
if action.get('_type') == 'ImageKnowledge/TextRecognitionAction':
|
break
|
||||||
text_action = action
|
if text_action:
|
||||||
break
|
regions = text_action['data'].get('regions', [])
|
||||||
if not text_action:
|
for region in regions:
|
||||||
return (False, 'No TextRecognitionAction action in response!')
|
for line in region.get('lines', []):
|
||||||
|
res += line['text'] + '\n'
|
||||||
regions = text_action['data'].get('regions', [])
|
|
||||||
|
|
||||||
res = ''
|
|
||||||
for region in regions:
|
|
||||||
for line in region.get('lines', []):
|
|
||||||
res += line['text'] + '\n'
|
|
||||||
|
|
||||||
x = (True, res)
|
x = (True, res)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def _preprocess(self, img):
|
def _preprocess(self, img):
|
||||||
img_bytes = pil_image_to_bytes(img)
|
max_pixel_size = 4000
|
||||||
return base64.b64encode(img_bytes).decode('utf-8')
|
max_byte_size = 767772
|
||||||
|
res = None
|
||||||
|
|
||||||
|
if any(x > max_pixel_size for x in img.size):
|
||||||
|
resize_factor = max(max_pixel_size / img.width, max_pixel_size / img.height)
|
||||||
|
new_w = int(img.width * resize_factor)
|
||||||
|
new_h = int(img.height * resize_factor)
|
||||||
|
img = img.resize((new_w, new_h), Image.LANCZOS)
|
||||||
|
|
||||||
|
img_bytes, _ = limit_image_size(img, max_byte_size)
|
||||||
|
|
||||||
|
if img_bytes:
|
||||||
|
res = base64.b64encode(img_bytes).decode('utf-8')
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
class AppleVision:
|
class AppleVision:
|
||||||
name = 'avision'
|
name = 'avision'
|
||||||
@@ -811,6 +860,7 @@ class OCRSpace:
|
|||||||
def __init__(self, config={}):
|
def __init__(self, config={}):
|
||||||
try:
|
try:
|
||||||
self.api_key = config['api_key']
|
self.api_key = config['api_key']
|
||||||
|
self.max_byte_size = config.get('file_size_limit', 1000000)
|
||||||
self.available = True
|
self.available = True
|
||||||
logger.info('OCRSpace ready')
|
logger.info('OCRSpace ready')
|
||||||
except:
|
except:
|
||||||
@@ -824,11 +874,15 @@ class OCRSpace:
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||||
|
|
||||||
|
img_bytes, img_extension = self._preprocess(img)
|
||||||
|
if not img_bytes:
|
||||||
|
return (False, 'Image is too big!')
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
'apikey': self.api_key,
|
'apikey': self.api_key,
|
||||||
'language': 'jpn'
|
'language': 'jpn'
|
||||||
}
|
}
|
||||||
files = {'file': ('image.jpg', self._preprocess(img), 'image/jpeg')}
|
files = {'file': ('image.' + img_extension, img_bytes, 'image/' + img_extension)}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
res = requests.post('https://api.ocr.space/parse/image', data=data, files=files, timeout=20)
|
res = requests.post('https://api.ocr.space/parse/image', data=data, files=files, timeout=20)
|
||||||
@@ -851,5 +905,5 @@ class OCRSpace:
|
|||||||
x = (True, res)
|
x = (True, res)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def _preprocess(self, img):
|
def _preprocess(self, img):
|
||||||
return pil_image_to_bytes(img, 'jpeg')
|
return limit_image_size(img, self.max_byte_size)
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "owocr"
|
name = "owocr"
|
||||||
version = "1.12.1"
|
version = "1.12.2"
|
||||||
description = "Japanese OCR"
|
description = "Japanese OCR"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
|
|||||||
Reference in New Issue
Block a user