From ef0f3a16e39c7cfa05fd9435c4432ec126ddd171 Mon Sep 17 00:00:00 2001
From: rtr46 <171138461+rtr46@users.noreply.github.com>
Date: Wed, 5 Nov 2025 12:53:04 +0100
Subject: [PATCH] add meikiocr

---
 README.md        |   1 +
 owocr/ocr.py     | 103 +++++++++++++++++++++++++++++++++++++++++++++++
 owocr_config.ini |   6 +--
 pyproject.toml   |   4 ++
 4 files changed, 111 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index e79429e..dc188ac 100644
--- a/README.md
+++ b/README.md
@@ -61,6 +61,7 @@ The command-line options/config file allow you to configure OCR providers, hotke
 - Apple Live Text (VisionKit framework) - It should be the same as Vision except that in Sonoma Apple added vertical text reading. **macOS only - Recommended (pre-installed)** → key: `d`
 - WinRT OCR: install: `pip install "owocr[winocr]"`. It can also be used by installing winocr on a Windows virtual machine and running the server there (`winocr_serve`) and specifying the IP address of the Windows VM/machine in the config file. **Windows 10/11 only** → key: `w`
 - OneOCR - install: `pip install "owocr[oneocr]"`. Close second local best to the Apple one. You need to copy 3 system files from Windows 11 to use it, refer to the readme [here](https://github.com/AuroraWright/oneocr). It can also be used by installing oneocr on a Windows virtual machine and running the server there (`oneocr_serve`) and specifying the IP address of the Windows VM/machine in the config file. **Windows 10/11 only - Recommended** → key: `z`
+- [meikiocr](https://github.com/rtr46/meikiocr) - install: `pip install "owocr[meikiocr]"`. Comparable to OneOCR in accuracy and CPU latency. Can be run on Nvidia GPUs via `pip uninstall onnxruntime && pip install onnxruntime-gpu` making it the fastest OCR available. Probably best option for Linux users. Can't process vertical text and is limited to 64 text lines and 48 characters per line.  → key: `k`
 
 ## Cloud
 - Google Lens - install: `pip install "owocr[lens]"`. Arguably the best OCR engine to date. **Recommended** → key: `l`
diff --git a/owocr/ocr.py b/owocr/ocr.py
index bef28f8..401cb06 100644
--- a/owocr/ocr.py
+++ b/owocr/ocr.py
@@ -62,6 +62,11 @@ try:
 except ImportError:
     pass
 
+try:
+    from meikiocr import MeikiOCR as MKOCR
+except ImportError:
+    pass
+
 try:
     import winocr
 except ImportError:
@@ -1775,6 +1780,104 @@ class RapidOCR:
     def _preprocess(self, img):
         return pil_image_to_numpy_array(img)
 
+class MeikiOCR:
+    name = 'meikiocr'
+    readable_name = 'meikiocr'
+    key = 'k'
+    config_entry = 'meikiocr'
+    available = False
+    local = True
+    manual_language = False
+    coordinate_support = True
+    threading_support = True
+
+    def __init__(self, config={}, language='ja'):
+        if 'meikiocr' not in sys.modules:
+            logger.warning('meikiocr not available, meikiocr will not work!')
+        else:
+            logger.info('Loading meikiocr model')
+            self.model = MKOCR()
+            self.available = True
+            logger.info('meikiocr ready')
+
+    def _to_normalized_bbox(self, pixel_bbox: List[int], img_width: int, img_height: int) -> BoundingBox:
+        """Converts a bbox [x1, y1, x2, y2] to a normalized BoundingBox."""
+        x1, y1, x2, y2 = pixel_bbox
+        width_px = x2 - x1
+        height_px = y2 - y1
+        center_x_px = x1 + width_px / 2
+        center_y_px = y1 + height_px / 2
+
+        return BoundingBox(
+            center_x=center_x_px / img_width,
+            center_y=center_y_px / img_height,
+            width=width_px / img_width,
+            height=height_px / img_height
+        )
+
+    def _to_generic_result(self, response: List[dict], img_width: int, img_height: int) -> OcrResult:
+        """Converts the raw meikiocr output into the standardized OcrResult format."""
+        paragraphs = []
+
+        # each dictionary in the response corresponds to a detected line of text.
+        # treat each line as a separate Paragraph containing a single Line.
+        for line_result in response:
+            line_text = line_result.get('text', '')
+            char_results = line_result.get('chars', [])
+            if not line_text or not char_results:
+                continue
+
+            char_in_line = []
+            for char_info in char_results:
+                normalized_bbox = self._to_normalized_bbox(
+                    char_info['bbox'], img_width, img_height
+                )
+                word = Word(
+                    text=char_info['char'],
+                    bounding_box=normalized_bbox
+                )
+                char_in_line.append(word)
+
+            if not char_in_line:
+                continue
+
+            line_bbox = merge_bounding_boxes(char_in_line)
+
+            line = Line(
+                bounding_box=line_bbox,
+                words=char_in_line,
+                text=line_text
+            )
+
+            # each line becomes a paragraph.
+            paragraph = Paragraph(
+                bounding_box=line_bbox,
+                lines=[line],
+                writing_direction="LEFT_TO_RIGHT"  # meikiocr only supports horizontal text
+            )
+            paragraphs.append(paragraph)
+
+        return OcrResult(
+            image_properties=ImageProperties(width=img_width, height=img_height),
+            paragraphs=paragraphs
+        )
+
+    def __call__(self, img):
+        img, is_path = input_to_pil_image(img)
+        if not img:
+            return (False, 'Invalid image provided')
+
+        image_np = np.array(img.convert('RGB'))[:, :, ::-1]
+
+        read_results = self.model.run_ocr(image_np)
+        ocr_result = self._to_generic_result(read_results, img.width, img.height)
+
+        x = (True, ocr_result)
+
+        if is_path:
+            img.close()
+        return x
+
 class OCRSpace:
     name = 'ocrspace'
     readable_name = 'OCRSpace'
diff --git a/owocr_config.ini b/owocr_config.ini
index b63335c..de31c1c 100644
--- a/owocr_config.ini
+++ b/owocr_config.ini
@@ -16,7 +16,7 @@
 
 ;OCR engine to use. Available: "mangaocr", "mangaocrs", "glens", "bing","gvision",
 ;"avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr",
-;"ocrspace"
+;"ocrspace", "meikiocr"
 ;engine =
 
 ;Local OCR engine to use for two-pass screen capture processing.
@@ -33,8 +33,8 @@
 ;delete_images = False
 
 ;Restrict engines to load. Available: avision,alivetext,bing,glens,gvision,azure,
-;mangaocr,mangaocrs,winrtocr,oneocr, easyocr,rapidocr,ocrspace
-;engines = avision,alivetext,bing,glens,gvision,azure,mangaocr,mangaocrs,winrtocr,oneocr,easyocr,rapidocr,ocrspace
+;mangaocr,mangaocrs,winrtocr,oneocr, easyocr,rapidocr,ocrspace,meikiocr
+;engines = avision,alivetext,bing,glens,gvision,azure,mangaocr,mangaocrs,winrtocr,oneocr,easyocr,rapidocr,ocrspace,meikiocr
 
 ;logger_format = <green>{time:HH:mm:ss.SSS}</green> | <level>{message}</level>
 
diff --git a/pyproject.toml b/pyproject.toml
index d6a1f0c..b672c6f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,6 +45,10 @@ rapidocr = [
     "onnxruntime"
 ]
 
+meikiocr = [
+    "meikiocr"
+]
+
 mangaocr = [
     "manga-ocr",
     "setuptools<80",