From 2e07d3da1ebb19d46ed2d2636dafc27727c59d3f Mon Sep 17 00:00:00 2001
From: AuroraWright <AuroraWright@users.noreply.github.com>
Date: Wed, 18 Dec 2024 05:15:14 +0100
Subject: [PATCH] Implement OCRSpace

---
 README.md        |  1 +
 owocr/ocr.py     | 53 ++++++++++++++++++++++++++++++++++++++++++++++++
 owocr/run.py     |  2 +-
 owocr_config.ini |  6 ++++--
 4 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 5210897..c32a505 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,7 @@ Additionally:
 - Google Lens: Google Vision in disguise (no need for API keys!), however it needs to download a couple megabytes of data for each request. You need to install pyjson5 and requests (`pip install pyjson5 requests`) ("l" key)
 - Google Vision: you need a service account .json file named google_vision.json in `user directory/.config/` and installing google-cloud-vision (`pip install google-cloud-vision`) ("g" key)
 - Azure Image Analysis: you need to specify an api key and an endpoint in the config file and to install azure-ai-vision-imageanalysis (`pip install azure-ai-vision-imageanalysis`) ("v" key)
+- OCRSpace: you need to specify an api key in the config file and to install requests (`pip install requests`) ("o" key)
 
 # Acknowledgments
 
diff --git a/owocr/ocr.py b/owocr/ocr.py
index 68bc696..ed791cb 100644
--- a/owocr/ocr.py
+++ b/owocr/ocr.py
@@ -568,3 +568,56 @@ class RapidOCR:
 
     def _preprocess(self, img):
         return pil_image_to_numpy_array(img)
+
+class OCRSpace:
+    name = 'ocrspace'
+    readable_name = 'OCRSpace'
+    key = 'o'
+    available = False
+
+    def __init__(self, config={}):
+        if 'requests' not in sys.modules:
+            logger.warning('requests not available, OCRSpace will not work!')
+        else:
+            try:
+                self.api_key = config['api_key']
+                self.available = True
+                logger.info('OCRSpace ready')
+            except:
+                logger.warning('Error reading API key from config, OCRSpace will not work!')
+
+    def __call__(self, img_or_path):
+        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
+            img = Image.open(img_or_path)
+        elif isinstance(img_or_path, Image.Image):
+            img = img_or_path
+        else:
+            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+
+        data = {
+            'apikey': self.api_key,
+            'language': 'jpn'
+        }
+        files = {'file': ('image.png', self._preprocess(img), 'image/png')}
+
+        try:
+            res = requests.post('https://api.ocr.space/parse/image', data=data, files=files, timeout=20)
+        except requests.exceptions.Timeout:
+            return (False, 'Request timeout!')
+        except requests.exceptions.ConnectionError:
+            return (False, 'Connection error!')
+
+        if res.status_code != 200:
+            return (False, 'Unknown error!')
+
+        res = res.json()
+
+        if type(res) == str or res['IsErroredOnProcessing']:
+            return (False, 'Unknown error!')
+
+        res = res['ParsedResults'][0]['ParsedText']
+        x = (True, res)
+        return x
+
+    def _preprocess(self, img):
+        return pil_image_to_bytes(img)
diff --git a/owocr/run.py b/owocr/run.py
index f2fbe0d..c23b08e 100644
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -630,7 +630,7 @@ def run(read_from=None,
     :param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.
     :param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.
     :param delay_secs: How often to check for new images, in seconds.
-    :param engine: OCR engine to use. Available: "mangaocr", "glens", "gvision", "avision", "alivetext", "azure", "winrtocr", "easyocr", "rapidocr".
+    :param engine: OCR engine to use. Available: "mangaocr", "glens", "gvision", "avision", "alivetext", "azure", "winrtocr", "easyocr", "rapidocr", "ocrspace".
     :param pause_at_startup: Pause at startup.
     :param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).
     :param delete_images: Delete image files after processing when reading from a directory.
diff --git a/owocr_config.ini b/owocr_config.ini
index 9442c35..f3b59ac 100644
--- a/owocr_config.ini
+++ b/owocr_config.ini
@@ -1,5 +1,5 @@
 [general]
-;engines = avision,alivetext,glens,gvision,azure,mangaocr,winrtocr,easyocr,rapidocr
+;engines = avision,alivetext,glens,gvision,azure,mangaocr,winrtocr,easyocr,rapidocr,ocrspace
 ;engine = glens
 ;read_from = clipboard
 ;write_to = clipboard
@@ -36,4 +36,6 @@
 pretrained_model_name_or_path = kha-white/manga-ocr-base
 force_cpu = False
 [easyocr]
-gpu = True
\ No newline at end of file
+gpu = True
+[ocrspace]
+;api_key = api_key_here
\ No newline at end of file