diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..24b8140 --- /dev/null +++ b/.gitignore @@ -0,0 +1,140 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +.idea/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..30404ce --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +TODO \ No newline at end of file diff --git a/assets/crop.png b/assets/crop.png new file mode 100644 index 0000000..b6ba3a3 Binary files /dev/null and b/assets/crop.png differ diff --git a/manga_ocr/__init__.py b/manga_ocr/__init__.py new file mode 100644 index 0000000..65120cf --- /dev/null +++ b/manga_ocr/__init__.py @@ -0,0 +1 @@ +from manga_ocr.ocr import MangaOcr diff --git a/manga_ocr/ocr.py b/manga_ocr/ocr.py new file mode 100644 index 0000000..c350d7f --- /dev/null +++ b/manga_ocr/ocr.py @@ -0,0 +1,55 @@ +import re +from pathlib import Path + +import jaconv +import torch +from PIL import Image +from loguru import logger +from transformers import AutoFeatureExtractor, AutoTokenizer, VisionEncoderDecoderModel + + +class MangaOcr: + def __init__(self, pretrained_model_name_or_path='kha-white/manga-ocr-base', force_cpu=False): + logger.info(f'Loading OCR model from {pretrained_model_name_or_path}') + self.feature_extractor = AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path) + self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path) + self.model = VisionEncoderDecoderModel.from_pretrained(pretrained_model_name_or_path) + + if not force_cpu and torch.cuda.is_available(): + logger.info('Using CUDA') + self.model.cuda() + else: + logger.info('Using CPU') + + self(Path(__file__).parent.parent / 'assets/crop.png') + + logger.info('OCR ready') + + def __call__(self, img_or_path): + if isinstance(img_or_path, str) or isinstance(img_or_path, Path): + img = Image.open(img_or_path) + elif isinstance(img_or_path, Image.Image): + img = img_or_path + else: + raise ValueError(f'Invalid value of img_or_path: {img_or_path}') + + img = img.convert('L').convert('RGB') + + x = self._preprocess(img) + x = self.model.generate(x[None].to(self.model.device))[0].cpu() + x = self.tokenizer.decode(x, skip_special_tokens=True) + x = post_process(x) + return x + + def _preprocess(self, img): + pixel_values = self.feature_extractor(img, return_tensors="pt").pixel_values + return pixel_values.squeeze() + + +def post_process(text): + text = ''.join(text.split()) + text = text.replace('…', '...') + text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text) + text = jaconv.h2z(text, ascii=True, digit=True) + + return text diff --git a/manga_ocr/run.py b/manga_ocr/run.py new file mode 100644 index 0000000..3362242 --- /dev/null +++ b/manga_ocr/run.py @@ -0,0 +1,96 @@ +import time +from pathlib import Path + +import PIL.Image +import PIL.ImageGrab +import fire +import numpy as np +import pyperclip +from loguru import logger + +from manga_ocr import MangaOcr + + +def are_images_identical(img1, img2): + if None in (img1, img2): + return img1 == img2 + + img1 = np.array(img1) + img2 = np.array(img2) + + return (img1.shape == img2.shape) and (img1 == img2).all() + + +def process_and_write_results(mocr, img_or_path, write_to): + t0 = time.time() + text = mocr(img_or_path) + t1 = time.time() + + logger.info(f'Text recognized in {t1 - t0:0.03f} s: {text}') + + if write_to == 'clipboard': + pyperclip.copy(text) + else: + write_to = Path(write_to) + if write_to.suffix != '.txt': + raise ValueError('write_to must be either "clipboard" or a path to a text file') + + with write_to.open('a') as f: + f.write(text + '\n') + + +def run( + read_from='clipboard', + write_to='clipboard', + pretrained_model_name_or_path='kha-white/manga-ocr-base', + force_cpu=False, + delay_secs=0.1, +): + mocr = MangaOcr(pretrained_model_name_or_path, force_cpu) + + if read_from == 'clipboard': + logger.info('Reading from clipboard') + + img = None + while True: + old_img = img + + try: + img = PIL.ImageGrab.grabclipboard() + except OSError: + logger.warning('Error while reading from clipboard') + else: + if isinstance(img, PIL.Image.Image) and not are_images_identical(img, old_img): + process_and_write_results(mocr, img, write_to) + + time.sleep(delay_secs) + + + else: + read_from = Path(read_from) + if not read_from.is_dir(): + raise ValueError('read_from must be either "clipboard" or a path to a directory') + + logger.info(f'Reading from directory {read_from}') + + old_paths = set() + for path in read_from.iterdir(): + old_paths.add(path) + + while True: + for path in read_from.iterdir(): + if path not in old_paths: + old_paths.add(path) + + try: + img = PIL.Image.open(path) + except PIL.UnidentifiedImageError: + logger.warning(f'Error while reading file {path}') + else: + process_and_write_results(mocr, img, write_to) + + time.sleep(0.5) + + +if __name__ == '__main__': + fire.Fire(run) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2cdbb57 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +fire +jaconv +loguru +numpy +Pillow +pyperclip +torch +transformers>=4.12.5