Replace PaddleOCR with RapidOCR, handle errors when offline

This commit is contained in:
AuroraWright
2024-01-30 11:11:07 +01:00
parent 52c822272e
commit c7565b0eed
5 changed files with 105 additions and 58 deletions

View File

@@ -11,7 +11,7 @@ This has been tested with Python 3.11. Newer/older versions might work. It can b
## Local providers ## Local providers
- [Manga OCR](https://github.com/kha-white/manga-ocr): refer to the readme for installation ("m" key) - [Manga OCR](https://github.com/kha-white/manga-ocr): refer to the readme for installation ("m" key)
- [EasyOCR](https://github.com/JaidedAI/EasyOCR): refer to the readme for installation ("e" key) - [EasyOCR](https://github.com/JaidedAI/EasyOCR): refer to the readme for installation ("e" key)
- [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR): refer to the [wiki](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.7/doc/doc_en/quickstart_en.md) for installation ("o" key) - [RapidOCR](https://github.com/RapidAI/RapidOCR): refer to the readme for installation ("r" key)
- Apple Vision framework: this will work on macOS Ventura or later. In my experience, the best of the local providers for horizontal text ("a" key) - Apple Vision framework: this will work on macOS Ventura or later. In my experience, the best of the local providers for horizontal text ("a" key)
- WinRT OCR: this will work on Windows 10 or later if winocr (`pip install winocr`) is installed. It can also be used by installing winocr on a Windows virtual machine and running the server (`winocr_serve`), installing requests (`pip install requests`) and specifying the IP address of the Windows VM/machine in the config file (see below) ("w" key) - WinRT OCR: this will work on Windows 10 or later if winocr (`pip install winocr`) is installed. It can also be used by installing winocr on a Windows virtual machine and running the server (`winocr_serve`), installing requests (`pip install requests`) and specifying the IP address of the Windows VM/machine in the config file (see below) ("w" key)

View File

@@ -5,6 +5,7 @@ from pathlib import Path
import time import time
import sys import sys
import platform import platform
import logging
from math import sqrt from math import sqrt
import jaconv import jaconv
@@ -27,6 +28,7 @@ except ImportError:
try: try:
from google.cloud import vision from google.cloud import vision
from google.oauth2 import service_account from google.oauth2 import service_account
from google.api_core.exceptions import ServiceUnavailable
except ImportError: except ImportError:
pass pass
@@ -34,6 +36,7 @@ try:
from azure.cognitiveservices.vision.computervision import ComputerVisionClient from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from msrest.authentication import CognitiveServicesCredentials from msrest.authentication import CognitiveServicesCredentials
from msrest.exceptions import ClientRequestError
except ImportError: except ImportError:
pass pass
@@ -43,7 +46,7 @@ except ImportError:
pass pass
try: try:
from paddleocr import PaddleOCR as POCR from rapidocr_onnxruntime import RapidOCR as ROCR
except ImportError: except ImportError:
pass pass
@@ -96,7 +99,7 @@ class MangaOcr:
else: else:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}') raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
x = self.model(img) x = (True, self.model(img))
return x return x
class GoogleVision: class GoogleVision:
@@ -129,9 +132,14 @@ class GoogleVision:
image_bytes = self._preprocess(img) image_bytes = self._preprocess(img)
image = vision.Image(content=image_bytes) image = vision.Image(content=image_bytes)
response = self.client.text_detection(image=image) try:
response = self.client.text_detection(image=image)
except ServiceUnavailable:
return (False, 'Connection error!')
except:
return (False, 'Unknown error!')
texts = response.text_annotations texts = response.text_annotations
x = post_process(texts[0].description) x = (True, post_process(texts[0].description))
return x return x
def _preprocess(self, img): def _preprocess(self, img):
@@ -168,22 +176,30 @@ class GoogleLens:
try: try:
res = requests.post(url, files=files, timeout=20) res = requests.post(url, files=files, timeout=20)
except requests.exceptions.Timeout: except requests.exceptions.Timeout:
return 'Request timeout!' return (False, 'Request timeout!')
except requests.exceptions.ConnectionError:
return (False, 'Connection error!')
x = '' if res.status_code != 200:
if res.status_code == 200: return (False, 'Unknown error!')
regex = re.compile(r">AF_initDataCallback\(({key: 'ds:1'.*?)\);</script>")
match = regex.search(res.text)
if match != None:
lens_object = pyjson5.loads(match.group(1))
if not 'errorHasStatus' in lens_object:
text = lens_object['data'][3][4][0]
if len(text) > 0:
lines = text[0]
for line in lines:
x += line + ' '
x = post_process(x)
regex = re.compile(r">AF_initDataCallback\(({key: 'ds:1'.*?)\);</script>")
match = regex.search(res.text)
if match == None:
return (False, 'Regex error!')
lens_object = pyjson5.loads(match.group(1))
if 'errorHasStatus' in lens_object:
return (False, 'Unknown Lens error!')
res = ''
text = lens_object['data'][3][4][0]
if len(text) > 0:
lines = text[0]
for line in lines:
res += line + ' '
x = (True, post_process(res))
return x return x
def _preprocess(self, img): def _preprocess(self, img):
@@ -238,9 +254,11 @@ class AppleVision:
for result in req.results(): for result in req.results():
res += result.text() + ' ' res += result.text() + ' '
req.dealloc() req.dealloc()
x = (True, post_process(res))
else:
x = (False, 'Unknown error!')
handler.dealloc() handler.dealloc()
x = post_process(res)
return x return x
def _preprocess(self, img): def _preprocess(self, img):
@@ -289,11 +307,16 @@ class WinRTOCR:
try: try:
res = requests.post(self.url, params=params, data=self._preprocess(img), timeout=3) res = requests.post(self.url, params=params, data=self._preprocess(img), timeout=3)
except requests.exceptions.Timeout: except requests.exceptions.Timeout:
return 'Request timeout!' return (False, 'Request timeout!')
except requests.exceptions.ConnectionError:
return (False, 'Connection error!')
if res.status_code != 200:
return (False, 'Unknown error!')
res = json.loads(res.text)['text'] res = json.loads(res.text)['text']
x = post_process(res) x = (True, post_process(res))
return x return x
def _preprocess(self, img): def _preprocess(self, img):
@@ -328,24 +351,33 @@ class AzureComputerVision:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}') raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
image_io = self._preprocess(img) image_io = self._preprocess(img)
read_response = self.client.read_in_stream(image_io, raw=True) logging.getLogger('urllib3.connectionpool').disabled = True
read_operation_location = read_response.headers['Operation-Location'] try:
operation_id = read_operation_location.split('/')[-1] read_response = self.client.read_in_stream(image_io, raw=True)
while True: read_operation_location = read_response.headers['Operation-Location']
read_result = self.client.get_read_result(operation_id) operation_id = read_operation_location.split('/')[-1]
if read_result.status.lower() not in ['notstarted', 'running']:
break while True:
time.sleep(0.3) read_result = self.client.get_read_result(operation_id)
if read_result.status.lower() not in [OperationStatusCodes.not_started, OperationStatusCodes.running]:
break
time.sleep(0.3)
except ClientRequestError:
return (False, 'Connection error!')
except:
return (False, 'Unknown error!')
res = '' res = ''
if read_result.status == OperationStatusCodes.succeeded: if read_result.status == OperationStatusCodes.succeeded:
for text_result in read_result.analyze_result.read_results: for text_result in read_result.analyze_result.read_results:
for line in text_result.lines: for line in text_result.lines:
res += line.text + ' ' res += line.text + ' '
else:
return (False, 'Unknown error!')
x = post_process(res) x = (True, post_process(res))
return x return x
def _preprocess(self, img): def _preprocess(self, img):
@@ -382,7 +414,7 @@ class EasyOCR:
for text in read_result: for text in read_result:
res += text + ' ' res += text + ' '
x = post_process(res) x = (True, post_process(res))
return x return x
def _preprocess(self, img): def _preprocess(self, img):
@@ -390,20 +422,29 @@ class EasyOCR:
img.save(image_bytes, format='png') img.save(image_bytes, format='png')
return image_bytes.getvalue() return image_bytes.getvalue()
class PaddleOCR: class RapidOCR:
name = 'paddleocr' name = 'rapidocr'
readable_name = 'PaddleOCR' readable_name = 'RapidOCR'
key = 'o' key = 'r'
available = False available = False
def __init__(self): def __init__(self):
if 'paddleocr' not in sys.modules: if 'rapidocr_onnxruntime' not in sys.modules:
logger.warning('paddleocr not available, PaddleOCR will not work!') logger.warning('rapidocr_onnxruntime not available, RapidOCR will not work!')
else: else:
logger.info('Loading PaddleOCR model') rapidocr_model_file = os.path.join(os.path.expanduser('~'),'.cache','rapidocr_japan_PP-OCRv4_rec_infer.onnx')
self.model = POCR(use_angle_cls=True, show_log=False, lang='japan') if not os.path.isfile(rapidocr_model_file):
logger.info('Downloading RapidOCR model')
try:
urllib.request.urlretrieve('https://raw.githubusercontent.com/AuroraWright/owocr/master/rapidocr_japan_PP-OCRv4_rec_infer.onnx', rapidocr_model_file)
except Exception as inst:
logger.warning('Download failed. RapidOCR will not work!')
return
logger.info('Loading RapidOCR model')
self.model = ROCR(rec_model_path=rapidocr_model_file)
self.available = True self.available = True
logger.info('PaddleOCR ready') logger.info('RapidOCR ready')
def __call__(self, img_or_path): def __call__(self, img_or_path):
if isinstance(img_or_path, str) or isinstance(img_or_path, Path): if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
@@ -413,15 +454,16 @@ class PaddleOCR:
else: else:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}') raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
logging.getLogger().disabled = True
res = '' res = ''
read_results = self.model.ocr(self._preprocess(img), cls=True) read_results, elapsed = self.model(self._preprocess(img))
for read_result in read_results: if read_results:
if read_result: for read_result in read_results:
for text in read_result: res += read_result[1] + ' '
res += text[1][0] + ' '
x = post_process(res) x = (True, post_process(res))
return x return x
def _preprocess(self, img): def _preprocess(self, img):
return np.array(img.convert('RGB')) return np.array(img.convert('RGB'))

View File

@@ -93,6 +93,8 @@ class WebsocketServerThread(threading.Thread):
await websocket.send('False') await websocket.send('False')
except websockets.exceptions.ConnectionClosedOK: except websockets.exceptions.ConnectionClosedOK:
pass pass
except websockets.exceptions.ConnectionClosedError:
pass
finally: finally:
self.clients.remove(websocket) self.clients.remove(websocket)
@@ -105,7 +107,7 @@ class WebsocketServerThread(threading.Thread):
def run(self): def run(self):
asyncio.set_event_loop(self.loop) asyncio.set_event_loop(self.loop)
start_server = websockets.serve(self.server_handler, '0.0.0.0', config.get_general('websocket_port'), max_size=50000000) start_server = websockets.serve(self.server_handler, '0.0.0.0', config.get_general('websocket_port'), max_size=1000000000)
self.server = start_server self.server = start_server
self.loop.run_until_complete(start_server) self.loop.run_until_complete(start_server)
self.loop.run_forever() self.loop.run_forever()
@@ -244,17 +246,20 @@ def are_images_identical(img1, img2):
def process_and_write_results(engine_instance, img_or_path, write_to): def process_and_write_results(engine_instance, img_or_path, write_to):
t0 = time.time() t0 = time.time()
text = engine_instance(img_or_path) res, text = engine_instance(img_or_path)
t1 = time.time() t1 = time.time()
engine_color = config.get_general('engine_color') engine_color = config.get_general('engine_color')
logger.opt(ansi=True).info(f'Text recognized in {t1 - t0:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}') if res:
if config.get_general('notifications'): logger.opt(ansi=True).info(f'Text recognized in {t1 - t0:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
notification = Notify() if config.get_general('notifications'):
notification.application_name = 'owocr' notification = Notify()
notification.title = 'Text recognized:' notification.application_name = 'owocr'
notification.message = text notification.title = 'Text recognized:'
notification.send(block=False) notification.message = text
notification.send(block=False)
else:
logger.opt(ansi=True).info(f'<{engine_color}>{engine_instance.readable_name}</{engine_color}> reported an error after {t1 - t0:0.03f}s: {text}')
if write_to == 'websocket': if write_to == 'websocket':
websocket_server_thread.send_text(text) websocket_server_thread.send_text(text)
@@ -294,7 +299,7 @@ def run(read_from=None,
:param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", "screencapture", or a path to a directory. :param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", "screencapture", or a path to a directory.
:param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file. :param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.
:param delay_secs: How often to check for new images, in seconds. :param delay_secs: How often to check for new images, in seconds.
:param engine: OCR engine to use. Available: "mangaocr", "glens", "gvision", "avision", "azure", "winrtocr", "easyocr", "paddleocr". :param engine: OCR engine to use. Available: "mangaocr", "glens", "gvision", "avision", "azure", "winrtocr", "easyocr", "rapidocr".
:param pause_at_startup: Pause at startup. :param pause_at_startup: Pause at startup.
:param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string). :param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).
:param delete_images: Delete image files after processing when reading from a directory. :param delete_images: Delete image files after processing when reading from a directory.

View File

@@ -1,5 +1,5 @@
[general] [general]
;engines = avision,glens,gvision,azure,mangaocr,winrtocr,easyocr,paddleocr ;engines = avision,glens,gvision,azure,mangaocr,winrtocr,easyocr,rapidocr
;engine = glens ;engine = glens
;read_from = clipboard ;read_from = clipboard
;write_to = clipboard ;write_to = clipboard

Binary file not shown.