Have segmented and non segmented manga OCR as separate engines (comic text detector doesn't work well on small text areas)
This commit is contained in:
@@ -46,7 +46,7 @@ The command-line options/config file allow you to configure OCR providers, hotke
|
|||||||
# Supported engines
|
# Supported engines
|
||||||
|
|
||||||
## Local
|
## Local
|
||||||
- [Manga OCR](https://github.com/kha-white/manga-ocr) (with [comic-text-detector](https://github.com/dmMaze/comic-text-detector) as segmenter) - install: `pip install owocr[mangaocr]` → key: `m`
|
- [Manga OCR](https://github.com/kha-white/manga-ocr) (with optional [comic-text-detector](https://github.com/dmMaze/comic-text-detector) as segmenter) - install: `pip install owocr[mangaocr]` → keys: `m` (regular, ideal for small text areas), `n` (segmented, ideal for manga panels/larger images with multiple text areas)
|
||||||
- [EasyOCR](https://github.com/JaidedAI/EasyOCR) - install: `pip install owocr[easyocr]` → key: `e`
|
- [EasyOCR](https://github.com/JaidedAI/EasyOCR) - install: `pip install owocr[easyocr]` → key: `e`
|
||||||
- [RapidOCR](https://github.com/RapidAI/RapidOCR) - install: `pip install owocr[rapidocr]` → key: `r`
|
- [RapidOCR](https://github.com/RapidAI/RapidOCR) - install: `pip install owocr[rapidocr]` → key: `r`
|
||||||
- Apple Vision framework - Probably the best local engine to date. **macOS only - Recommended (pre-installed)** → key: `a`
|
- Apple Vision framework - Probably the best local engine to date. **macOS only - Recommended (pre-installed)** → key: `a`
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ parser.add_argument('-rs', '--read_from_secondary', type=str, default=argparse.S
|
|||||||
parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS,
|
parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS,
|
||||||
help='Where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.')
|
help='Where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.')
|
||||||
parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS,
|
parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS,
|
||||||
help='OCR engine to use. Available: "mangaocr", "glens", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".')
|
help='OCR engine to use. Available: "mangaocr", "mangaocrs", glens", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".')
|
||||||
parser.add_argument('-es', '--engine_secondary', type=str, default=argparse.SUPPRESS,
|
parser.add_argument('-es', '--engine_secondary', type=str, default=argparse.SUPPRESS,
|
||||||
help='Local OCR engine to use for two-pass screen capture processing.')
|
help='Local OCR engine to use for two-pass screen capture processing.')
|
||||||
parser.add_argument('-p', '--pause_at_startup', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
|
parser.add_argument('-p', '--pause_at_startup', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
|
||||||
|
|||||||
75
owocr/ocr.py
75
owocr/ocr.py
@@ -85,6 +85,8 @@ try:
|
|||||||
except:
|
except:
|
||||||
optimized_png_encode = False
|
optimized_png_encode = False
|
||||||
|
|
||||||
|
manga_ocr_model = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class BoundingBox:
|
class BoundingBox:
|
||||||
@@ -132,8 +134,18 @@ class OcrResult:
|
|||||||
paragraphs: List[Paragraph] = field(default_factory=list)
|
paragraphs: List[Paragraph] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
def empty_post_process(text):
|
def initialize_manga_ocr(pretrained_model_name_or_path, force_cpu):
|
||||||
return text
|
def empty_post_process(text):
|
||||||
|
return text
|
||||||
|
|
||||||
|
global manga_ocr_model
|
||||||
|
if not manga_ocr_model:
|
||||||
|
logger.disable('manga_ocr')
|
||||||
|
logging.getLogger('transformers').setLevel(logging.ERROR) # silence transformers >=4.46 warnings
|
||||||
|
from manga_ocr import ocr
|
||||||
|
ocr.post_process = empty_post_process
|
||||||
|
logger.info(f'Loading Manga OCR model')
|
||||||
|
manga_ocr_model = MOCR(pretrained_model_name_or_path, force_cpu)
|
||||||
|
|
||||||
def input_to_pil_image(img):
|
def input_to_pil_image(img):
|
||||||
is_path = False
|
is_path = False
|
||||||
@@ -243,7 +255,7 @@ def merge_bounding_boxes(ocr_element_list, rotated=False):
|
|||||||
cos_a, sin_a = np.cos(angle), np.sin(angle)
|
cos_a, sin_a = np.cos(angle), np.sin(angle)
|
||||||
rot = np.array([[cos_a, -sin_a], [sin_a, cos_a]])
|
rot = np.array([[cos_a, -sin_a], [sin_a, cos_a]])
|
||||||
corners.append(local @ rot.T + [cx, cy])
|
corners.append(local @ rot.T + [cx, cy])
|
||||||
|
|
||||||
return np.vstack(corners) if corners else np.empty((0, 2))
|
return np.vstack(corners) if corners else np.empty((0, 2))
|
||||||
|
|
||||||
def _convex_hull(points):
|
def _convex_hull(points):
|
||||||
@@ -364,10 +376,10 @@ def merge_bounding_boxes(ocr_element_list, rotated=False):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MangaOcr:
|
class MangaOcrSegmented:
|
||||||
name = 'mangaocr'
|
name = 'mangaocrs'
|
||||||
readable_name = 'Manga OCR'
|
readable_name = 'Manga OCR (segmented)'
|
||||||
key = 'm'
|
key = 'n'
|
||||||
available = False
|
available = False
|
||||||
local = True
|
local = True
|
||||||
manual_language = False
|
manual_language = False
|
||||||
@@ -376,9 +388,9 @@ class MangaOcr:
|
|||||||
|
|
||||||
def __init__(self, config={}):
|
def __init__(self, config={}):
|
||||||
if 'manga_ocr' not in sys.modules:
|
if 'manga_ocr' not in sys.modules:
|
||||||
logger.warning('manga-ocr not available, Manga OCR will not work!')
|
logger.warning('manga-ocr not available, Manga OCR (segmented) will not work!')
|
||||||
elif 'scipy' not in sys.modules:
|
elif 'scipy' not in sys.modules:
|
||||||
logger.warning('scipy not available, Manga OCR will not work!')
|
logger.warning('scipy not available, Manga OCR (segmented) will not work!')
|
||||||
else:
|
else:
|
||||||
comic_text_detector_path = Path.home() / ".cache" / "manga-ocr"
|
comic_text_detector_path = Path.home() / ".cache" / "manga-ocr"
|
||||||
comic_text_detector_file = comic_text_detector_path / "comictextdetector.pt"
|
comic_text_detector_file = comic_text_detector_path / "comictextdetector.pt"
|
||||||
@@ -389,18 +401,12 @@ class MangaOcr:
|
|||||||
try:
|
try:
|
||||||
urllib.request.urlretrieve('https://github.com/zyddnys/manga-image-translator/releases/download/beta-0.3/comictextdetector.pt', str(comic_text_detector_file))
|
urllib.request.urlretrieve('https://github.com/zyddnys/manga-image-translator/releases/download/beta-0.3/comictextdetector.pt', str(comic_text_detector_file))
|
||||||
except:
|
except:
|
||||||
logger.warning('Download failed. Manga OCR will not work!')
|
logger.warning('Download failed. Manga OCR (segmented) will not work!')
|
||||||
return
|
return
|
||||||
|
|
||||||
pretrained_model_name_or_path = config.get('pretrained_model_name_or_path', 'kha-white/manga-ocr-base')
|
pretrained_model_name_or_path = config.get('pretrained_model_name_or_path', 'kha-white/manga-ocr-base')
|
||||||
force_cpu = config.get('force_cpu', False)
|
force_cpu = config.get('force_cpu', False)
|
||||||
|
initialize_manga_ocr(pretrained_model_name_or_path, force_cpu)
|
||||||
logger.disable('manga_ocr')
|
|
||||||
logging.getLogger('transformers').setLevel(logging.ERROR) # silence transformers >=4.46 warnings
|
|
||||||
from manga_ocr import ocr
|
|
||||||
ocr.post_process = empty_post_process
|
|
||||||
logger.info(f'Loading Manga OCR model')
|
|
||||||
self.model = MOCR(pretrained_model_name_or_path, force_cpu)
|
|
||||||
|
|
||||||
if not force_cpu and torch.cuda.is_available():
|
if not force_cpu and torch.cuda.is_available():
|
||||||
device = 'cuda'
|
device = 'cuda'
|
||||||
@@ -412,7 +418,7 @@ class MangaOcr:
|
|||||||
self.text_detector_model = TextDetector(model_path=comic_text_detector_file, input_size=1024, device=device, act='leaky')
|
self.text_detector_model = TextDetector(model_path=comic_text_detector_file, input_size=1024, device=device, act='leaky')
|
||||||
|
|
||||||
self.available = True
|
self.available = True
|
||||||
logger.info('Manga OCR ready')
|
logger.info('Manga OCR (segmented) ready')
|
||||||
|
|
||||||
def _convert_line_bbox(self, rect, img_width, img_height):
|
def _convert_line_bbox(self, rect, img_width, img_height):
|
||||||
x1, y1 = float(rect[0][0]), float(rect[0][1])
|
x1, y1 = float(rect[0][0]), float(rect[0][1])
|
||||||
@@ -505,7 +511,7 @@ class MangaOcr:
|
|||||||
for line_crop in line_crops:
|
for line_crop in line_crops:
|
||||||
if blk.vertical:
|
if blk.vertical:
|
||||||
line_crop = cv2.rotate(line_crop, cv2.ROTATE_90_CLOCKWISE)
|
line_crop = cv2.rotate(line_crop, cv2.ROTATE_90_CLOCKWISE)
|
||||||
l_text += self.model(Image.fromarray(line_crop))
|
l_text += manga_ocr_model(Image.fromarray(line_crop))
|
||||||
l_bbox = self._convert_line_bbox(line.tolist(), img_width, img_height)
|
l_bbox = self._convert_line_bbox(line.tolist(), img_width, img_height)
|
||||||
|
|
||||||
word = Word(
|
word = Word(
|
||||||
@@ -549,6 +555,37 @@ class MangaOcr:
|
|||||||
img.close()
|
img.close()
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
class MangaOcr:
|
||||||
|
name = 'mangaocr'
|
||||||
|
readable_name = 'Manga OCR'
|
||||||
|
key = 'm'
|
||||||
|
available = False
|
||||||
|
local = True
|
||||||
|
manual_language = False
|
||||||
|
coordinate_support = False
|
||||||
|
threading_support = True
|
||||||
|
|
||||||
|
def __init__(self, config={}):
|
||||||
|
if 'manga_ocr' not in sys.modules:
|
||||||
|
logger.warning('manga-ocr not available, Manga OCR will not work!')
|
||||||
|
else:
|
||||||
|
pretrained_model_name_or_path = config.get('pretrained_model_name_or_path', 'kha-white/manga-ocr-base')
|
||||||
|
force_cpu = config.get('force_cpu', False)
|
||||||
|
initialize_manga_ocr(pretrained_model_name_or_path, force_cpu)
|
||||||
|
self.available = True
|
||||||
|
logger.info('Manga OCR ready')
|
||||||
|
|
||||||
|
def __call__(self, img):
|
||||||
|
img, is_path = input_to_pil_image(img)
|
||||||
|
if not img:
|
||||||
|
return (False, 'Invalid image provided')
|
||||||
|
|
||||||
|
x = (True, [manga_ocr_model(img)])
|
||||||
|
|
||||||
|
if is_path:
|
||||||
|
img.close()
|
||||||
|
return x
|
||||||
|
|
||||||
class GoogleVision:
|
class GoogleVision:
|
||||||
name = 'gvision'
|
name = 'gvision'
|
||||||
readable_name = 'Google Vision'
|
readable_name = 'Google Vision'
|
||||||
|
|||||||
@@ -14,9 +14,10 @@
|
|||||||
;a path to a text file.
|
;a path to a text file.
|
||||||
;write_to = clipboard
|
;write_to = clipboard
|
||||||
|
|
||||||
;OCR engine to use. Available: "mangaocr", "glens", "bing","gvision", "avision",
|
;OCR engine to use. Available: "mangaocr", "mangaocrs", "glens", "bing","gvision",
|
||||||
;"alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".
|
;"avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr",
|
||||||
;engine =
|
;"ocrspace"
|
||||||
|
;engine =
|
||||||
|
|
||||||
;OCR engine to use for two-pass processing.
|
;OCR engine to use for two-pass processing.
|
||||||
;engine_secondary =
|
;engine_secondary =
|
||||||
@@ -31,9 +32,9 @@
|
|||||||
;Delete image files after processing when reading from a directory.
|
;Delete image files after processing when reading from a directory.
|
||||||
;delete_images = False
|
;delete_images = False
|
||||||
|
|
||||||
;Restricts engines to load. Available:
|
;Restrict engines to load. Available: avision,alivetext,bing,glens,gvision,azure,
|
||||||
;avision,alivetext,bing,glens,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace
|
;mangaocr,mangaocrs,winrtocr,oneocr, easyocr,rapidocr,ocrspace
|
||||||
;engines = avision,alivetext,bing,glens,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace
|
;engines = avision,alivetext,bing,glens,gvision,azure,mangaocr,mangaocrs,winrtocr,oneocr,easyocr,rapidocr,ocrspace
|
||||||
|
|
||||||
;logger_format = <green>{time:HH:mm:ss.SSS}</green> | <level>{message}</level>
|
;logger_format = <green>{time:HH:mm:ss.SSS}</green> | <level>{message}</level>
|
||||||
|
|
||||||
@@ -146,6 +147,11 @@
|
|||||||
|
|
||||||
;force_cpu = False
|
;force_cpu = False
|
||||||
|
|
||||||
|
;[mangaocrs]
|
||||||
|
;pretrained_model_name_or_path = kha-white/manga-ocr-base
|
||||||
|
|
||||||
|
;force_cpu = False
|
||||||
|
|
||||||
;[easyocr]
|
;[easyocr]
|
||||||
;gpu = True
|
;gpu = True
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "owocr"
|
name = "owocr"
|
||||||
version = "1.19"
|
version = "1.19.1"
|
||||||
description = "Japanese OCR"
|
description = "Japanese OCR"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
|
|||||||
Reference in New Issue
Block a user