Replace PaddleOCR with RapidOCR, handle errors when offline
This commit is contained in:
@@ -11,7 +11,7 @@ This has been tested with Python 3.11. Newer/older versions might work. It can b
|
|||||||
## Local providers
|
## Local providers
|
||||||
- [Manga OCR](https://github.com/kha-white/manga-ocr): refer to the readme for installation ("m" key)
|
- [Manga OCR](https://github.com/kha-white/manga-ocr): refer to the readme for installation ("m" key)
|
||||||
- [EasyOCR](https://github.com/JaidedAI/EasyOCR): refer to the readme for installation ("e" key)
|
- [EasyOCR](https://github.com/JaidedAI/EasyOCR): refer to the readme for installation ("e" key)
|
||||||
- [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR): refer to the [wiki](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.7/doc/doc_en/quickstart_en.md) for installation ("o" key)
|
- [RapidOCR](https://github.com/RapidAI/RapidOCR): refer to the readme for installation ("r" key)
|
||||||
- Apple Vision framework: this will work on macOS Ventura or later. In my experience, the best of the local providers for horizontal text ("a" key)
|
- Apple Vision framework: this will work on macOS Ventura or later. In my experience, the best of the local providers for horizontal text ("a" key)
|
||||||
- WinRT OCR: this will work on Windows 10 or later if winocr (`pip install winocr`) is installed. It can also be used by installing winocr on a Windows virtual machine and running the server (`winocr_serve`), installing requests (`pip install requests`) and specifying the IP address of the Windows VM/machine in the config file (see below) ("w" key)
|
- WinRT OCR: this will work on Windows 10 or later if winocr (`pip install winocr`) is installed. It can also be used by installing winocr on a Windows virtual machine and running the server (`winocr_serve`), installing requests (`pip install requests`) and specifying the IP address of the Windows VM/machine in the config file (see below) ("w" key)
|
||||||
|
|
||||||
|
|||||||
102
owocr/ocr.py
102
owocr/ocr.py
@@ -5,6 +5,7 @@ from pathlib import Path
|
|||||||
import time
|
import time
|
||||||
import sys
|
import sys
|
||||||
import platform
|
import platform
|
||||||
|
import logging
|
||||||
from math import sqrt
|
from math import sqrt
|
||||||
|
|
||||||
import jaconv
|
import jaconv
|
||||||
@@ -27,6 +28,7 @@ except ImportError:
|
|||||||
try:
|
try:
|
||||||
from google.cloud import vision
|
from google.cloud import vision
|
||||||
from google.oauth2 import service_account
|
from google.oauth2 import service_account
|
||||||
|
from google.api_core.exceptions import ServiceUnavailable
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -34,6 +36,7 @@ try:
|
|||||||
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
|
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
|
||||||
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
|
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
|
||||||
from msrest.authentication import CognitiveServicesCredentials
|
from msrest.authentication import CognitiveServicesCredentials
|
||||||
|
from msrest.exceptions import ClientRequestError
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -43,7 +46,7 @@ except ImportError:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from paddleocr import PaddleOCR as POCR
|
from rapidocr_onnxruntime import RapidOCR as ROCR
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -96,7 +99,7 @@ class MangaOcr:
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||||
|
|
||||||
x = self.model(img)
|
x = (True, self.model(img))
|
||||||
return x
|
return x
|
||||||
|
|
||||||
class GoogleVision:
|
class GoogleVision:
|
||||||
@@ -129,9 +132,14 @@ class GoogleVision:
|
|||||||
|
|
||||||
image_bytes = self._preprocess(img)
|
image_bytes = self._preprocess(img)
|
||||||
image = vision.Image(content=image_bytes)
|
image = vision.Image(content=image_bytes)
|
||||||
|
try:
|
||||||
response = self.client.text_detection(image=image)
|
response = self.client.text_detection(image=image)
|
||||||
|
except ServiceUnavailable:
|
||||||
|
return (False, 'Connection error!')
|
||||||
|
except:
|
||||||
|
return (False, 'Unknown error!')
|
||||||
texts = response.text_annotations
|
texts = response.text_annotations
|
||||||
x = post_process(texts[0].description)
|
x = (True, post_process(texts[0].description))
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def _preprocess(self, img):
|
def _preprocess(self, img):
|
||||||
@@ -168,22 +176,30 @@ class GoogleLens:
|
|||||||
try:
|
try:
|
||||||
res = requests.post(url, files=files, timeout=20)
|
res = requests.post(url, files=files, timeout=20)
|
||||||
except requests.exceptions.Timeout:
|
except requests.exceptions.Timeout:
|
||||||
return 'Request timeout!'
|
return (False, 'Request timeout!')
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
return (False, 'Connection error!')
|
||||||
|
|
||||||
|
if res.status_code != 200:
|
||||||
|
return (False, 'Unknown error!')
|
||||||
|
|
||||||
x = ''
|
|
||||||
if res.status_code == 200:
|
|
||||||
regex = re.compile(r">AF_initDataCallback\(({key: 'ds:1'.*?)\);</script>")
|
regex = re.compile(r">AF_initDataCallback\(({key: 'ds:1'.*?)\);</script>")
|
||||||
match = regex.search(res.text)
|
match = regex.search(res.text)
|
||||||
if match != None:
|
if match == None:
|
||||||
|
return (False, 'Regex error!')
|
||||||
|
|
||||||
lens_object = pyjson5.loads(match.group(1))
|
lens_object = pyjson5.loads(match.group(1))
|
||||||
if not 'errorHasStatus' in lens_object:
|
if 'errorHasStatus' in lens_object:
|
||||||
|
return (False, 'Unknown Lens error!')
|
||||||
|
|
||||||
|
res = ''
|
||||||
text = lens_object['data'][3][4][0]
|
text = lens_object['data'][3][4][0]
|
||||||
if len(text) > 0:
|
if len(text) > 0:
|
||||||
lines = text[0]
|
lines = text[0]
|
||||||
for line in lines:
|
for line in lines:
|
||||||
x += line + ' '
|
res += line + ' '
|
||||||
x = post_process(x)
|
|
||||||
|
|
||||||
|
x = (True, post_process(res))
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def _preprocess(self, img):
|
def _preprocess(self, img):
|
||||||
@@ -238,9 +254,11 @@ class AppleVision:
|
|||||||
for result in req.results():
|
for result in req.results():
|
||||||
res += result.text() + ' '
|
res += result.text() + ' '
|
||||||
req.dealloc()
|
req.dealloc()
|
||||||
|
x = (True, post_process(res))
|
||||||
|
else:
|
||||||
|
x = (False, 'Unknown error!')
|
||||||
|
|
||||||
handler.dealloc()
|
handler.dealloc()
|
||||||
x = post_process(res)
|
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def _preprocess(self, img):
|
def _preprocess(self, img):
|
||||||
@@ -289,11 +307,16 @@ class WinRTOCR:
|
|||||||
try:
|
try:
|
||||||
res = requests.post(self.url, params=params, data=self._preprocess(img), timeout=3)
|
res = requests.post(self.url, params=params, data=self._preprocess(img), timeout=3)
|
||||||
except requests.exceptions.Timeout:
|
except requests.exceptions.Timeout:
|
||||||
return 'Request timeout!'
|
return (False, 'Request timeout!')
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
return (False, 'Connection error!')
|
||||||
|
|
||||||
|
if res.status_code != 200:
|
||||||
|
return (False, 'Unknown error!')
|
||||||
|
|
||||||
res = json.loads(res.text)['text']
|
res = json.loads(res.text)['text']
|
||||||
|
|
||||||
x = post_process(res)
|
x = (True, post_process(res))
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def _preprocess(self, img):
|
def _preprocess(self, img):
|
||||||
@@ -328,6 +351,9 @@ class AzureComputerVision:
|
|||||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||||
|
|
||||||
image_io = self._preprocess(img)
|
image_io = self._preprocess(img)
|
||||||
|
logging.getLogger('urllib3.connectionpool').disabled = True
|
||||||
|
|
||||||
|
try:
|
||||||
read_response = self.client.read_in_stream(image_io, raw=True)
|
read_response = self.client.read_in_stream(image_io, raw=True)
|
||||||
|
|
||||||
read_operation_location = read_response.headers['Operation-Location']
|
read_operation_location = read_response.headers['Operation-Location']
|
||||||
@@ -335,17 +361,23 @@ class AzureComputerVision:
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
read_result = self.client.get_read_result(operation_id)
|
read_result = self.client.get_read_result(operation_id)
|
||||||
if read_result.status.lower() not in ['notstarted', 'running']:
|
if read_result.status.lower() not in [OperationStatusCodes.not_started, OperationStatusCodes.running]:
|
||||||
break
|
break
|
||||||
time.sleep(0.3)
|
time.sleep(0.3)
|
||||||
|
except ClientRequestError:
|
||||||
|
return (False, 'Connection error!')
|
||||||
|
except:
|
||||||
|
return (False, 'Unknown error!')
|
||||||
|
|
||||||
res = ''
|
res = ''
|
||||||
if read_result.status == OperationStatusCodes.succeeded:
|
if read_result.status == OperationStatusCodes.succeeded:
|
||||||
for text_result in read_result.analyze_result.read_results:
|
for text_result in read_result.analyze_result.read_results:
|
||||||
for line in text_result.lines:
|
for line in text_result.lines:
|
||||||
res += line.text + ' '
|
res += line.text + ' '
|
||||||
|
else:
|
||||||
|
return (False, 'Unknown error!')
|
||||||
|
|
||||||
x = post_process(res)
|
x = (True, post_process(res))
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def _preprocess(self, img):
|
def _preprocess(self, img):
|
||||||
@@ -382,7 +414,7 @@ class EasyOCR:
|
|||||||
for text in read_result:
|
for text in read_result:
|
||||||
res += text + ' '
|
res += text + ' '
|
||||||
|
|
||||||
x = post_process(res)
|
x = (True, post_process(res))
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def _preprocess(self, img):
|
def _preprocess(self, img):
|
||||||
@@ -390,20 +422,29 @@ class EasyOCR:
|
|||||||
img.save(image_bytes, format='png')
|
img.save(image_bytes, format='png')
|
||||||
return image_bytes.getvalue()
|
return image_bytes.getvalue()
|
||||||
|
|
||||||
class PaddleOCR:
|
class RapidOCR:
|
||||||
name = 'paddleocr'
|
name = 'rapidocr'
|
||||||
readable_name = 'PaddleOCR'
|
readable_name = 'RapidOCR'
|
||||||
key = 'o'
|
key = 'r'
|
||||||
available = False
|
available = False
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
if 'paddleocr' not in sys.modules:
|
if 'rapidocr_onnxruntime' not in sys.modules:
|
||||||
logger.warning('paddleocr not available, PaddleOCR will not work!')
|
logger.warning('rapidocr_onnxruntime not available, RapidOCR will not work!')
|
||||||
else:
|
else:
|
||||||
logger.info('Loading PaddleOCR model')
|
rapidocr_model_file = os.path.join(os.path.expanduser('~'),'.cache','rapidocr_japan_PP-OCRv4_rec_infer.onnx')
|
||||||
self.model = POCR(use_angle_cls=True, show_log=False, lang='japan')
|
if not os.path.isfile(rapidocr_model_file):
|
||||||
|
logger.info('Downloading RapidOCR model')
|
||||||
|
try:
|
||||||
|
urllib.request.urlretrieve('https://raw.githubusercontent.com/AuroraWright/owocr/master/rapidocr_japan_PP-OCRv4_rec_infer.onnx', rapidocr_model_file)
|
||||||
|
except Exception as inst:
|
||||||
|
logger.warning('Download failed. RapidOCR will not work!')
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info('Loading RapidOCR model')
|
||||||
|
self.model = ROCR(rec_model_path=rapidocr_model_file)
|
||||||
self.available = True
|
self.available = True
|
||||||
logger.info('PaddleOCR ready')
|
logger.info('RapidOCR ready')
|
||||||
|
|
||||||
def __call__(self, img_or_path):
|
def __call__(self, img_or_path):
|
||||||
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
||||||
@@ -413,15 +454,16 @@ class PaddleOCR:
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||||
|
|
||||||
|
logging.getLogger().disabled = True
|
||||||
res = ''
|
res = ''
|
||||||
read_results = self.model.ocr(self._preprocess(img), cls=True)
|
read_results, elapsed = self.model(self._preprocess(img))
|
||||||
|
if read_results:
|
||||||
for read_result in read_results:
|
for read_result in read_results:
|
||||||
if read_result:
|
res += read_result[1] + ' '
|
||||||
for text in read_result:
|
|
||||||
res += text[1][0] + ' '
|
|
||||||
|
|
||||||
x = post_process(res)
|
x = (True, post_process(res))
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def _preprocess(self, img):
|
def _preprocess(self, img):
|
||||||
return np.array(img.convert('RGB'))
|
return np.array(img.convert('RGB'))
|
||||||
|
|
||||||
|
|||||||
11
owocr/run.py
11
owocr/run.py
@@ -93,6 +93,8 @@ class WebsocketServerThread(threading.Thread):
|
|||||||
await websocket.send('False')
|
await websocket.send('False')
|
||||||
except websockets.exceptions.ConnectionClosedOK:
|
except websockets.exceptions.ConnectionClosedOK:
|
||||||
pass
|
pass
|
||||||
|
except websockets.exceptions.ConnectionClosedError:
|
||||||
|
pass
|
||||||
finally:
|
finally:
|
||||||
self.clients.remove(websocket)
|
self.clients.remove(websocket)
|
||||||
|
|
||||||
@@ -105,7 +107,7 @@ class WebsocketServerThread(threading.Thread):
|
|||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
asyncio.set_event_loop(self.loop)
|
asyncio.set_event_loop(self.loop)
|
||||||
start_server = websockets.serve(self.server_handler, '0.0.0.0', config.get_general('websocket_port'), max_size=50000000)
|
start_server = websockets.serve(self.server_handler, '0.0.0.0', config.get_general('websocket_port'), max_size=1000000000)
|
||||||
self.server = start_server
|
self.server = start_server
|
||||||
self.loop.run_until_complete(start_server)
|
self.loop.run_until_complete(start_server)
|
||||||
self.loop.run_forever()
|
self.loop.run_forever()
|
||||||
@@ -244,10 +246,11 @@ def are_images_identical(img1, img2):
|
|||||||
|
|
||||||
def process_and_write_results(engine_instance, img_or_path, write_to):
|
def process_and_write_results(engine_instance, img_or_path, write_to):
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
text = engine_instance(img_or_path)
|
res, text = engine_instance(img_or_path)
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
|
|
||||||
engine_color = config.get_general('engine_color')
|
engine_color = config.get_general('engine_color')
|
||||||
|
if res:
|
||||||
logger.opt(ansi=True).info(f'Text recognized in {t1 - t0:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
|
logger.opt(ansi=True).info(f'Text recognized in {t1 - t0:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
|
||||||
if config.get_general('notifications'):
|
if config.get_general('notifications'):
|
||||||
notification = Notify()
|
notification = Notify()
|
||||||
@@ -255,6 +258,8 @@ def process_and_write_results(engine_instance, img_or_path, write_to):
|
|||||||
notification.title = 'Text recognized:'
|
notification.title = 'Text recognized:'
|
||||||
notification.message = text
|
notification.message = text
|
||||||
notification.send(block=False)
|
notification.send(block=False)
|
||||||
|
else:
|
||||||
|
logger.opt(ansi=True).info(f'<{engine_color}>{engine_instance.readable_name}</{engine_color}> reported an error after {t1 - t0:0.03f}s: {text}')
|
||||||
|
|
||||||
if write_to == 'websocket':
|
if write_to == 'websocket':
|
||||||
websocket_server_thread.send_text(text)
|
websocket_server_thread.send_text(text)
|
||||||
@@ -294,7 +299,7 @@ def run(read_from=None,
|
|||||||
:param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", "screencapture", or a path to a directory.
|
:param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", "screencapture", or a path to a directory.
|
||||||
:param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.
|
:param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.
|
||||||
:param delay_secs: How often to check for new images, in seconds.
|
:param delay_secs: How often to check for new images, in seconds.
|
||||||
:param engine: OCR engine to use. Available: "mangaocr", "glens", "gvision", "avision", "azure", "winrtocr", "easyocr", "paddleocr".
|
:param engine: OCR engine to use. Available: "mangaocr", "glens", "gvision", "avision", "azure", "winrtocr", "easyocr", "rapidocr".
|
||||||
:param pause_at_startup: Pause at startup.
|
:param pause_at_startup: Pause at startup.
|
||||||
:param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).
|
:param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).
|
||||||
:param delete_images: Delete image files after processing when reading from a directory.
|
:param delete_images: Delete image files after processing when reading from a directory.
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[general]
|
[general]
|
||||||
;engines = avision,glens,gvision,azure,mangaocr,winrtocr,easyocr,paddleocr
|
;engines = avision,glens,gvision,azure,mangaocr,winrtocr,easyocr,rapidocr
|
||||||
;engine = glens
|
;engine = glens
|
||||||
;read_from = clipboard
|
;read_from = clipboard
|
||||||
;write_to = clipboard
|
;write_to = clipboard
|
||||||
|
|||||||
BIN
rapidocr_japan_PP-OCRv4_rec_infer.onnx
Normal file
BIN
rapidocr_japan_PP-OCRv4_rec_infer.onnx
Normal file
Binary file not shown.
Reference in New Issue
Block a user