Implement OCRSpace
This commit is contained in:
@@ -40,6 +40,7 @@ Additionally:
|
|||||||
- Google Lens: Google Vision in disguise (no need for API keys!), however it needs to download a couple megabytes of data for each request. You need to install pyjson5 and requests (`pip install pyjson5 requests`) ("l" key)
|
- Google Lens: Google Vision in disguise (no need for API keys!), however it needs to download a couple megabytes of data for each request. You need to install pyjson5 and requests (`pip install pyjson5 requests`) ("l" key)
|
||||||
- Google Vision: you need a service account .json file named google_vision.json in `user directory/.config/` and installing google-cloud-vision (`pip install google-cloud-vision`) ("g" key)
|
- Google Vision: you need a service account .json file named google_vision.json in `user directory/.config/` and installing google-cloud-vision (`pip install google-cloud-vision`) ("g" key)
|
||||||
- Azure Image Analysis: you need to specify an api key and an endpoint in the config file and to install azure-ai-vision-imageanalysis (`pip install azure-ai-vision-imageanalysis`) ("v" key)
|
- Azure Image Analysis: you need to specify an api key and an endpoint in the config file and to install azure-ai-vision-imageanalysis (`pip install azure-ai-vision-imageanalysis`) ("v" key)
|
||||||
|
- OCRSpace: you need to specify an api key in the config file and to install requests (`pip install requests`) ("o" key)
|
||||||
|
|
||||||
# Acknowledgments
|
# Acknowledgments
|
||||||
|
|
||||||
|
|||||||
53
owocr/ocr.py
53
owocr/ocr.py
@@ -568,3 +568,56 @@ class RapidOCR:
|
|||||||
|
|
||||||
def _preprocess(self, img):
|
def _preprocess(self, img):
|
||||||
return pil_image_to_numpy_array(img)
|
return pil_image_to_numpy_array(img)
|
||||||
|
|
||||||
|
class OCRSpace:
|
||||||
|
name = 'ocrspace'
|
||||||
|
readable_name = 'OCRSpace'
|
||||||
|
key = 'o'
|
||||||
|
available = False
|
||||||
|
|
||||||
|
def __init__(self, config={}):
|
||||||
|
if 'requests' not in sys.modules:
|
||||||
|
logger.warning('requests not available, OCRSpace will not work!')
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
self.api_key = config['api_key']
|
||||||
|
self.available = True
|
||||||
|
logger.info('OCRSpace ready')
|
||||||
|
except:
|
||||||
|
logger.warning('Error reading API key from config, OCRSpace will not work!')
|
||||||
|
|
||||||
|
def __call__(self, img_or_path):
|
||||||
|
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
||||||
|
img = Image.open(img_or_path)
|
||||||
|
elif isinstance(img_or_path, Image.Image):
|
||||||
|
img = img_or_path
|
||||||
|
else:
|
||||||
|
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'apikey': self.api_key,
|
||||||
|
'language': 'jpn'
|
||||||
|
}
|
||||||
|
files = {'file': ('image.png', self._preprocess(img), 'image/png')}
|
||||||
|
|
||||||
|
try:
|
||||||
|
res = requests.post('https://api.ocr.space/parse/image', data=data, files=files, timeout=20)
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
return (False, 'Request timeout!')
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
return (False, 'Connection error!')
|
||||||
|
|
||||||
|
if res.status_code != 200:
|
||||||
|
return (False, 'Unknown error!')
|
||||||
|
|
||||||
|
res = res.json()
|
||||||
|
|
||||||
|
if type(res) == str or res['IsErroredOnProcessing']:
|
||||||
|
return (False, 'Unknown error!')
|
||||||
|
|
||||||
|
res = res['ParsedResults'][0]['ParsedText']
|
||||||
|
x = (True, res)
|
||||||
|
return x
|
||||||
|
|
||||||
|
def _preprocess(self, img):
|
||||||
|
return pil_image_to_bytes(img)
|
||||||
|
|||||||
@@ -630,7 +630,7 @@ def run(read_from=None,
|
|||||||
:param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.
|
:param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.
|
||||||
:param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.
|
:param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.
|
||||||
:param delay_secs: How often to check for new images, in seconds.
|
:param delay_secs: How often to check for new images, in seconds.
|
||||||
:param engine: OCR engine to use. Available: "mangaocr", "glens", "gvision", "avision", "alivetext", "azure", "winrtocr", "easyocr", "rapidocr".
|
:param engine: OCR engine to use. Available: "mangaocr", "glens", "gvision", "avision", "alivetext", "azure", "winrtocr", "easyocr", "rapidocr", "ocrspace".
|
||||||
:param pause_at_startup: Pause at startup.
|
:param pause_at_startup: Pause at startup.
|
||||||
:param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).
|
:param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).
|
||||||
:param delete_images: Delete image files after processing when reading from a directory.
|
:param delete_images: Delete image files after processing when reading from a directory.
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[general]
|
[general]
|
||||||
;engines = avision,alivetext,glens,gvision,azure,mangaocr,winrtocr,easyocr,rapidocr
|
;engines = avision,alivetext,glens,gvision,azure,mangaocr,winrtocr,easyocr,rapidocr,ocrspace
|
||||||
;engine = glens
|
;engine = glens
|
||||||
;read_from = clipboard
|
;read_from = clipboard
|
||||||
;write_to = clipboard
|
;write_to = clipboard
|
||||||
@@ -37,3 +37,5 @@ pretrained_model_name_or_path = kha-white/manga-ocr-base
|
|||||||
force_cpu = False
|
force_cpu = False
|
||||||
[easyocr]
|
[easyocr]
|
||||||
gpu = True
|
gpu = True
|
||||||
|
[ocrspace]
|
||||||
|
;api_key = api_key_here
|
||||||
Reference in New Issue
Block a user