add meikiocr
This commit is contained in:
@@ -61,6 +61,7 @@ The command-line options/config file allow you to configure OCR providers, hotke
|
|||||||
- Apple Live Text (VisionKit framework) - It should be the same as Vision except that in Sonoma Apple added vertical text reading. **macOS only - Recommended (pre-installed)** → key: `d`
|
- Apple Live Text (VisionKit framework) - It should be the same as Vision except that in Sonoma Apple added vertical text reading. **macOS only - Recommended (pre-installed)** → key: `d`
|
||||||
- WinRT OCR: install: `pip install "owocr[winocr]"`. It can also be used by installing winocr on a Windows virtual machine and running the server there (`winocr_serve`) and specifying the IP address of the Windows VM/machine in the config file. **Windows 10/11 only** → key: `w`
|
- WinRT OCR: install: `pip install "owocr[winocr]"`. It can also be used by installing winocr on a Windows virtual machine and running the server there (`winocr_serve`) and specifying the IP address of the Windows VM/machine in the config file. **Windows 10/11 only** → key: `w`
|
||||||
- OneOCR - install: `pip install "owocr[oneocr]"`. Close second local best to the Apple one. You need to copy 3 system files from Windows 11 to use it, refer to the readme [here](https://github.com/AuroraWright/oneocr). It can also be used by installing oneocr on a Windows virtual machine and running the server there (`oneocr_serve`) and specifying the IP address of the Windows VM/machine in the config file. **Windows 10/11 only - Recommended** → key: `z`
|
- OneOCR - install: `pip install "owocr[oneocr]"`. Close second local best to the Apple one. You need to copy 3 system files from Windows 11 to use it, refer to the readme [here](https://github.com/AuroraWright/oneocr). It can also be used by installing oneocr on a Windows virtual machine and running the server there (`oneocr_serve`) and specifying the IP address of the Windows VM/machine in the config file. **Windows 10/11 only - Recommended** → key: `z`
|
||||||
|
- [meikiocr](https://github.com/rtr46/meikiocr) - install: `pip install "owocr[meikiocr]"`. Comparable to OneOCR in accuracy and CPU latency. Can be run on Nvidia GPUs via `pip uninstall onnxruntime && pip install onnxruntime-gpu` making it the fastest OCR available. Probably best option for Linux users. Can't process vertical text and is limited to 64 text lines and 48 characters per line. → key: `k`
|
||||||
|
|
||||||
## Cloud
|
## Cloud
|
||||||
- Google Lens - install: `pip install "owocr[lens]"`. Arguably the best OCR engine to date. **Recommended** → key: `l`
|
- Google Lens - install: `pip install "owocr[lens]"`. Arguably the best OCR engine to date. **Recommended** → key: `l`
|
||||||
|
|||||||
103
owocr/ocr.py
103
owocr/ocr.py
@@ -62,6 +62,11 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
from meikiocr import MeikiOCR as MKOCR
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import winocr
|
import winocr
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@@ -1775,6 +1780,104 @@ class RapidOCR:
|
|||||||
def _preprocess(self, img):
|
def _preprocess(self, img):
|
||||||
return pil_image_to_numpy_array(img)
|
return pil_image_to_numpy_array(img)
|
||||||
|
|
||||||
|
class MeikiOCR:
|
||||||
|
name = 'meikiocr'
|
||||||
|
readable_name = 'meikiocr'
|
||||||
|
key = 'k'
|
||||||
|
config_entry = 'meikiocr'
|
||||||
|
available = False
|
||||||
|
local = True
|
||||||
|
manual_language = False
|
||||||
|
coordinate_support = True
|
||||||
|
threading_support = True
|
||||||
|
|
||||||
|
def __init__(self, config={}, language='ja'):
|
||||||
|
if 'meikiocr' not in sys.modules:
|
||||||
|
logger.warning('meikiocr not available, meikiocr will not work!')
|
||||||
|
else:
|
||||||
|
logger.info('Loading meikiocr model')
|
||||||
|
self.model = MKOCR()
|
||||||
|
self.available = True
|
||||||
|
logger.info('meikiocr ready')
|
||||||
|
|
||||||
|
def _to_normalized_bbox(self, pixel_bbox: List[int], img_width: int, img_height: int) -> BoundingBox:
|
||||||
|
"""Converts a bbox [x1, y1, x2, y2] to a normalized BoundingBox."""
|
||||||
|
x1, y1, x2, y2 = pixel_bbox
|
||||||
|
width_px = x2 - x1
|
||||||
|
height_px = y2 - y1
|
||||||
|
center_x_px = x1 + width_px / 2
|
||||||
|
center_y_px = y1 + height_px / 2
|
||||||
|
|
||||||
|
return BoundingBox(
|
||||||
|
center_x=center_x_px / img_width,
|
||||||
|
center_y=center_y_px / img_height,
|
||||||
|
width=width_px / img_width,
|
||||||
|
height=height_px / img_height
|
||||||
|
)
|
||||||
|
|
||||||
|
def _to_generic_result(self, response: List[dict], img_width: int, img_height: int) -> OcrResult:
|
||||||
|
"""Converts the raw meikiocr output into the standardized OcrResult format."""
|
||||||
|
paragraphs = []
|
||||||
|
|
||||||
|
# each dictionary in the response corresponds to a detected line of text.
|
||||||
|
# treat each line as a separate Paragraph containing a single Line.
|
||||||
|
for line_result in response:
|
||||||
|
line_text = line_result.get('text', '')
|
||||||
|
char_results = line_result.get('chars', [])
|
||||||
|
if not line_text or not char_results:
|
||||||
|
continue
|
||||||
|
|
||||||
|
char_in_line = []
|
||||||
|
for char_info in char_results:
|
||||||
|
normalized_bbox = self._to_normalized_bbox(
|
||||||
|
char_info['bbox'], img_width, img_height
|
||||||
|
)
|
||||||
|
word = Word(
|
||||||
|
text=char_info['char'],
|
||||||
|
bounding_box=normalized_bbox
|
||||||
|
)
|
||||||
|
char_in_line.append(word)
|
||||||
|
|
||||||
|
if not char_in_line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
line_bbox = merge_bounding_boxes(char_in_line)
|
||||||
|
|
||||||
|
line = Line(
|
||||||
|
bounding_box=line_bbox,
|
||||||
|
words=char_in_line,
|
||||||
|
text=line_text
|
||||||
|
)
|
||||||
|
|
||||||
|
# each line becomes a paragraph.
|
||||||
|
paragraph = Paragraph(
|
||||||
|
bounding_box=line_bbox,
|
||||||
|
lines=[line],
|
||||||
|
writing_direction="LEFT_TO_RIGHT" # meikiocr only supports horizontal text
|
||||||
|
)
|
||||||
|
paragraphs.append(paragraph)
|
||||||
|
|
||||||
|
return OcrResult(
|
||||||
|
image_properties=ImageProperties(width=img_width, height=img_height),
|
||||||
|
paragraphs=paragraphs
|
||||||
|
)
|
||||||
|
|
||||||
|
def __call__(self, img):
|
||||||
|
img, is_path = input_to_pil_image(img)
|
||||||
|
if not img:
|
||||||
|
return (False, 'Invalid image provided')
|
||||||
|
|
||||||
|
image_np = np.array(img.convert('RGB'))[:, :, ::-1]
|
||||||
|
|
||||||
|
read_results = self.model.run_ocr(image_np)
|
||||||
|
ocr_result = self._to_generic_result(read_results, img.width, img.height)
|
||||||
|
|
||||||
|
x = (True, ocr_result)
|
||||||
|
|
||||||
|
if is_path:
|
||||||
|
img.close()
|
||||||
|
return x
|
||||||
|
|
||||||
class OCRSpace:
|
class OCRSpace:
|
||||||
name = 'ocrspace'
|
name = 'ocrspace'
|
||||||
readable_name = 'OCRSpace'
|
readable_name = 'OCRSpace'
|
||||||
|
|||||||
@@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
;OCR engine to use. Available: "mangaocr", "mangaocrs", "glens", "bing","gvision",
|
;OCR engine to use. Available: "mangaocr", "mangaocrs", "glens", "bing","gvision",
|
||||||
;"avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr",
|
;"avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr",
|
||||||
;"ocrspace"
|
;"ocrspace", "meikiocr"
|
||||||
;engine =
|
;engine =
|
||||||
|
|
||||||
;Local OCR engine to use for two-pass screen capture processing.
|
;Local OCR engine to use for two-pass screen capture processing.
|
||||||
@@ -33,8 +33,8 @@
|
|||||||
;delete_images = False
|
;delete_images = False
|
||||||
|
|
||||||
;Restrict engines to load. Available: avision,alivetext,bing,glens,gvision,azure,
|
;Restrict engines to load. Available: avision,alivetext,bing,glens,gvision,azure,
|
||||||
;mangaocr,mangaocrs,winrtocr,oneocr, easyocr,rapidocr,ocrspace
|
;mangaocr,mangaocrs,winrtocr,oneocr, easyocr,rapidocr,ocrspace,meikiocr
|
||||||
;engines = avision,alivetext,bing,glens,gvision,azure,mangaocr,mangaocrs,winrtocr,oneocr,easyocr,rapidocr,ocrspace
|
;engines = avision,alivetext,bing,glens,gvision,azure,mangaocr,mangaocrs,winrtocr,oneocr,easyocr,rapidocr,ocrspace,meikiocr
|
||||||
|
|
||||||
;logger_format = <green>{time:HH:mm:ss.SSS}</green> | <level>{message}</level>
|
;logger_format = <green>{time:HH:mm:ss.SSS}</green> | <level>{message}</level>
|
||||||
|
|
||||||
|
|||||||
@@ -45,6 +45,10 @@ rapidocr = [
|
|||||||
"onnxruntime"
|
"onnxruntime"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
meikiocr = [
|
||||||
|
"meikiocr"
|
||||||
|
]
|
||||||
|
|
||||||
mangaocr = [
|
mangaocr = [
|
||||||
"manga-ocr",
|
"manga-ocr",
|
||||||
"setuptools<80",
|
"setuptools<80",
|
||||||
|
|||||||
Reference in New Issue
Block a user