From ea21bb71b30327c5aa4a4f7603ede3d311721479 Mon Sep 17 00:00:00 2001 From: Mar2ck Date: Sun, 27 Aug 2023 22:36:07 +0100 Subject: [PATCH] Add support for Linux clipboard reading (#44) * Allow clipboard reading on Linux using Pillow 10 * Update README.md to reflect Pillow changes * Update README.md with Linux clipboard requirements * Fix "Using CUDA;Using CPU" console output * Add verbose option to help with debugging * Restore wayland codepath for writing text to clipboard --- README.md | 7 ++++--- manga_ocr/ocr.py | 2 +- manga_ocr/run.py | 22 ++++++++++++---------- requirements.txt | 2 +- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 1d747dd..e6beaae 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ See also: # Installation -You need Python 3.6, 3.7, 3.8 or 3.9. Unfortunately, PyTorch does not support Python 3.10 yet. +You need Python 3.8, 3.9, 3.10 or 3.11. If you want to run with GPU, install PyTorch as described [here](https://pytorch.org/get-started/locally/#start-locally), otherwise this step can be skipped. @@ -66,10 +66,11 @@ text = mocr(img) Manga OCR can run in the background and process new images as they appear. -You might use a tool like [ShareX](https://getsharex.com/) to manually capture a region of the screen and let the +You might use a tool like [ShareX](https://getsharex.com/) or [Flameshot](https://flameshot.org/) to manually capture a region of the screen and let the OCR read it either from the system clipboard, or a specified directory. By default, Manga OCR will write recognized text to clipboard, from which it can be read by a dictionary like [Yomichan](https://github.com/FooSoft/yomichan). -Reading images from clipboard works only on Windows and macOS, on Linux you should read from a directory instead. + +Clipboard mode on Linux requires `wl-copy` for Wayland sessions or `xclip` for X11 sessions. You can find out which one your system needs by running `echo $XDG_SESSION_TYPE` in the terminal. Your full setup for reading manga in Japanese with a dictionary might look like this: diff --git a/manga_ocr/ocr.py b/manga_ocr/ocr.py index b4cc825..9c769d9 100644 --- a/manga_ocr/ocr.py +++ b/manga_ocr/ocr.py @@ -18,7 +18,7 @@ class MangaOcr: if not force_cpu and torch.cuda.is_available(): logger.info('Using CUDA') self.model.cuda() - if not force_cpu and torch.backends.mps.is_available(): + elif not force_cpu and torch.backends.mps.is_available(): logger.info('Using MPS') self.model.to('mps') else: diff --git a/manga_ocr/run.py b/manga_ocr/run.py index 1f7910b..a8bbf39 100644 --- a/manga_ocr/run.py +++ b/manga_ocr/run.py @@ -48,7 +48,8 @@ def run(read_from='clipboard', write_to='clipboard', pretrained_model_name_or_path='kha-white/manga-ocr-base', force_cpu=False, - delay_secs=0.1 + delay_secs=0.1, + verbose=False ): """ Run OCR in the background, waiting for new images to appear either in system clipboard, or a directory. @@ -58,6 +59,7 @@ def run(read_from='clipboard', :param write_to: Specifies where to save recognized texts to. Can be either "clipboard", or a path to a text file. :param pretrained_model_name_or_path: Path to a trained model, either local or from Transformers' model hub. :param force_cpu: If True, OCR will use CPU even if GPU is available. + :param verbose: If True, unhides all warnings. :param delay_secs: How often to check for new images, in seconds. """ @@ -76,12 +78,6 @@ def run(read_from='clipboard', raise NotImplementedError(msg) if read_from == 'clipboard': - - if sys.platform not in ('darwin', 'win32'): - msg = 'Reading images from clipboard works only on macOS and Windows. ' \ - 'On Linux, run "manga_ocr /path/to/screenshot/folder" to read images from a folder instead.' - raise NotImplementedError(msg) - from PIL import ImageGrab logger.info('Reading from clipboard') @@ -91,15 +87,21 @@ def run(read_from='clipboard', try: img = ImageGrab.grabclipboard() - except OSError: - logger.warning('Error while reading from clipboard') + except OSError as error: + if not verbose and "cannot identify image file" in str(error): + # Pillow error when clipboard hasn't changed since last grab (Linux) + pass + elif not verbose and "target image/png not available" in str(error): + # Pillow error when clipboard contains text (Linux, X11) + pass + else: + logger.warning('Error while reading from clipboard ({})'.format(error)) else: if isinstance(img, Image.Image) and not are_images_identical(img, old_img): process_and_write_results(mocr, img, write_to) time.sleep(delay_secs) - else: read_from = Path(read_from) if not read_from.is_dir(): diff --git a/requirements.txt b/requirements.txt index b5098d9..50227a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ fugashi jaconv loguru numpy -Pillow +Pillow>=10.0.0 pyperclip torch>=1.0 transformers>=4.25.0