From 2e07d3da1ebb19d46ed2d2636dafc27727c59d3f Mon Sep 17 00:00:00 2001 From: AuroraWright Date: Wed, 18 Dec 2024 05:15:14 +0100 Subject: [PATCH] Implement OCRSpace --- README.md | 1 + owocr/ocr.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ owocr/run.py | 2 +- owocr_config.ini | 6 ++++-- 4 files changed, 59 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5210897..c32a505 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ Additionally: - Google Lens: Google Vision in disguise (no need for API keys!), however it needs to download a couple megabytes of data for each request. You need to install pyjson5 and requests (`pip install pyjson5 requests`) ("l" key) - Google Vision: you need a service account .json file named google_vision.json in `user directory/.config/` and installing google-cloud-vision (`pip install google-cloud-vision`) ("g" key) - Azure Image Analysis: you need to specify an api key and an endpoint in the config file and to install azure-ai-vision-imageanalysis (`pip install azure-ai-vision-imageanalysis`) ("v" key) +- OCRSpace: you need to specify an api key in the config file and to install requests (`pip install requests`) ("o" key) # Acknowledgments diff --git a/owocr/ocr.py b/owocr/ocr.py index 68bc696..ed791cb 100644 --- a/owocr/ocr.py +++ b/owocr/ocr.py @@ -568,3 +568,56 @@ class RapidOCR: def _preprocess(self, img): return pil_image_to_numpy_array(img) + +class OCRSpace: + name = 'ocrspace' + readable_name = 'OCRSpace' + key = 'o' + available = False + + def __init__(self, config={}): + if 'requests' not in sys.modules: + logger.warning('requests not available, OCRSpace will not work!') + else: + try: + self.api_key = config['api_key'] + self.available = True + logger.info('OCRSpace ready') + except: + logger.warning('Error reading API key from config, OCRSpace will not work!') + + def __call__(self, img_or_path): + if isinstance(img_or_path, str) or isinstance(img_or_path, Path): + img = Image.open(img_or_path) + elif isinstance(img_or_path, Image.Image): + img = img_or_path + else: + raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}') + + data = { + 'apikey': self.api_key, + 'language': 'jpn' + } + files = {'file': ('image.png', self._preprocess(img), 'image/png')} + + try: + res = requests.post('https://api.ocr.space/parse/image', data=data, files=files, timeout=20) + except requests.exceptions.Timeout: + return (False, 'Request timeout!') + except requests.exceptions.ConnectionError: + return (False, 'Connection error!') + + if res.status_code != 200: + return (False, 'Unknown error!') + + res = res.json() + + if type(res) == str or res['IsErroredOnProcessing']: + return (False, 'Unknown error!') + + res = res['ParsedResults'][0]['ParsedText'] + x = (True, res) + return x + + def _preprocess(self, img): + return pil_image_to_bytes(img) diff --git a/owocr/run.py b/owocr/run.py index f2fbe0d..c23b08e 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -630,7 +630,7 @@ def run(read_from=None, :param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory. :param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file. :param delay_secs: How often to check for new images, in seconds. - :param engine: OCR engine to use. Available: "mangaocr", "glens", "gvision", "avision", "alivetext", "azure", "winrtocr", "easyocr", "rapidocr". + :param engine: OCR engine to use. Available: "mangaocr", "glens", "gvision", "avision", "alivetext", "azure", "winrtocr", "easyocr", "rapidocr", "ocrspace". :param pause_at_startup: Pause at startup. :param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string). :param delete_images: Delete image files after processing when reading from a directory. diff --git a/owocr_config.ini b/owocr_config.ini index 9442c35..f3b59ac 100644 --- a/owocr_config.ini +++ b/owocr_config.ini @@ -1,5 +1,5 @@ [general] -;engines = avision,alivetext,glens,gvision,azure,mangaocr,winrtocr,easyocr,rapidocr +;engines = avision,alivetext,glens,gvision,azure,mangaocr,winrtocr,easyocr,rapidocr,ocrspace ;engine = glens ;read_from = clipboard ;write_to = clipboard @@ -36,4 +36,6 @@ pretrained_model_name_or_path = kha-white/manga-ocr-base force_cpu = False [easyocr] -gpu = True \ No newline at end of file +gpu = True +[ocrspace] +;api_key = api_key_here \ No newline at end of file