diff --git a/README.md b/README.md index 0b26ea0..e2cea3f 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ Additionally: - Apple Vision framework: this will work on macOS Ventura or later. In my experience, the best of the local providers for horizontal text ("a" key) - Apple Live Text (VisionKit framework): this will work on macOS Ventura or later. It should be the same as Vision except that in Sonoma Apple added vertical text reading ("d" key) - WinRT OCR: install with `pip install owocr[winocr]` on Windows 10 and later. It can also be used by installing winocr on a Windows virtual machine and running the server there (`winocr_serve`) and specifying the IP address of the Windows VM/machine in the config file ("w" key) +- OneOCR: install with `pip install owocr[oneocr]` on Windows 10 and later. In my experience it's pretty good, though not as much as the Apple one. You need to copy 3 system files from Windows 11 to use it, refer to the readme (here)[https://github.com/AuroraWright/oneocr] ## Cloud providers - Google Lens: Google Vision in disguise (no need for API keys!), install with `pip install owocr[lens]` ("l" key) diff --git a/owocr/ocr.py b/owocr/ocr.py index 3e78db2..6af6abd 100644 --- a/owocr/ocr.py +++ b/owocr/ocr.py @@ -60,6 +60,11 @@ try: except ImportError: pass +try: + import oneocr +except ImportError: + pass + try: import pyjson5 except ImportError: @@ -710,6 +715,59 @@ class WinRTOCR: def _preprocess(self, img): return pil_image_to_bytes(img, png_compression=1) +class OneOCR: + name = 'oneocr' + readable_name = 'OneOCR' + key = 'z' + available = False + + def __init__(self, config={}): + if sys.platform == 'win32': + if int(platform.release()) < 10: + logger.warning('OneOCR is not supported on Windows older than 10!') + elif 'oneocr' not in sys.modules: + logger.warning('oneocr not available, OneOCR will not work!') + else: + self.model = oneocr.OcrEngine() + self.available = True + logger.info('OneOCR ready') + else: + try: + self.url = config['url'] + self.available = True + logger.info('OneOCR ready') + except: + logger.warning('Error reading URL from config, OneOCR will not work!') + + def __call__(self, img_or_path): + if isinstance(img_or_path, str) or isinstance(img_or_path, Path): + img = Image.open(img_or_path) + elif isinstance(img_or_path, Image.Image): + img = img_or_path + else: + raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}') + + if sys.platform == 'win32': + res = self.model.recognize_pil(img)['text'] + else: + try: + res = requests.post(self.url, data=self._preprocess(img), timeout=3) + except requests.exceptions.Timeout: + return (False, 'Request timeout!') + except requests.exceptions.ConnectionError: + return (False, 'Connection error!') + + if res.status_code != 200: + return (False, 'Unknown error!') + + res = res.json()['text'] + + x = (True, res) + return x + + def _preprocess(self, img): + return pil_image_to_bytes(img, png_compression=1) + class AzureImageAnalysis: name = 'azure' readable_name = 'Azure Image Analysis' diff --git a/owocr/run.py b/owocr/run.py index 745cd8d..f7bff31 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -633,7 +633,7 @@ def run(read_from=None, :param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory. :param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file. :param delay_secs: How often to check for new images, in seconds. - :param engine: OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "easyocr", "rapidocr", "ocrspace". + :param engine: OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace". :param pause_at_startup: Pause at startup. :param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string). :param delete_images: Delete image files after processing when reading from a directory. diff --git a/owocr_config.ini b/owocr_config.ini index 27ca4ec..61d3adc 100644 --- a/owocr_config.ini +++ b/owocr_config.ini @@ -1,5 +1,5 @@ [general] -;engines = avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,easyocr,rapidocr,ocrspace +;engines = avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace ;engine = glens ;read_from = clipboard ;write_to = clipboard @@ -29,6 +29,8 @@ ;screen_capture_combo = ++s [winrtocr] ;url = http://aaa.xxx.yyy.zzz:8000 +[oneocr] +;url = http://aaa.xxx.yyy.zzz:8000 [azure] ;api_key = api_key_here ;endpoint = https://YOURPROJECT.cognitiveservices.azure.com/ diff --git a/pyproject.toml b/pyproject.toml index 1f14419..7868634 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "owocr" -version = "1.12.2" +version = "1.13" description = "Japanese OCR" readme = "README.md" requires-python = ">=3.11" @@ -32,6 +32,9 @@ dependencies = [ "requests", "pywinctl;platform_system=='Linux'", "winrt-windows.applicationmodel.core==2.3.0;platform_system=='Windows'", + "winrt-windows-globalization==2.3.0;platform_system=='Windows'", + "winrt-Windows.Media.Ocr==2.3.0;platform_system=='Windows'", + "winrt-Windows.Storage.Streams==2.3.0;platform_system=='Windows'", "pywin32;platform_system=='Windows'", "pyobjc;platform_system=='Darwin'" ] @@ -58,10 +61,14 @@ mangaocr = [ "manga-ocr", ] -winrt = [ +winocr = [ "winocr", ] +oneocr = [ + "oneocr", +] + lens = [ "betterproto==2.0.0b7", ]