diff --git a/README.md b/README.md index 41b26f1..992022a 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ However: - for systems where text can be copied to the clipboard at the same time as images, if `*ocr_ignore*` is copied with an image, the image will be ignored - optionally, notifications can be enabled in the config file to show the text with a native OS notification - optionally, you can speed up the online providers by installing fpng-py: `pip install fpng-py` (requires a developer environment on some operating systems/Python versions) +- optionally, you can improve filtering of non-Japanese text for screen capture by installing transformers: `pip install transformers` - idle resource usage on macOS and Windows when reading from the clipboard has been eliminated using native OS polling - a config file (to be created in `user directory/.config/owocr_config.ini`, on Windows `user directory` is the `C:\Users\yourusername` folder) can be used to configure the script, as an example to limit providers (to reduce clutter/memory usage) as well as specifying provider settings such as api keys etc. A sample config file is provided [here](https://raw.githubusercontent.com/AuroraWright/owocr/master/owocr_config.ini) diff --git a/owocr/run.py b/owocr/run.py index a81b1c3..846eaf8 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -19,7 +19,6 @@ from PIL import UnidentifiedImageError from loguru import logger from pynput import keyboard from notifypy import Notify -from pysbd import Segmenter import inspect from owocr.ocr import * @@ -125,6 +124,7 @@ class TextFiltering: accurate_filtering = False def __init__(self): + from pysbd import Segmenter self.segmenter = Segmenter(language='ja', clean=True) try: from transformers import pipeline, AutoTokenizer diff --git a/requirements.txt b/requirements.txt index c68bc8f..3d97d3a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,5 +10,6 @@ notify-py mss pywinctl pysbd +langid pywin32;platform_system=='Windows' pyobjc;platform_system=='Darwin' \ No newline at end of file diff --git a/setup.py b/setup.py index 2c254b0..5607cc3 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ long_description = (Path(__file__).parent / "README.md").read_text('utf-8') setup( name="owocr", - version='1.5', + version='1.5.1', description="Japanese OCR", long_description=long_description, long_description_content_type="text/markdown", @@ -31,6 +31,7 @@ setup( "mss", "pywinctl", "pysbd", + "langid", "pywin32;platform_system=='Windows'", "pyobjc;platform_system=='Darwin'" ],