Add langid to requirements and clarification to readme

This commit is contained in:
AuroraWright
2024-02-08 19:56:02 +01:00
parent 7951596ee3
commit 7fc7b497ee
4 changed files with 5 additions and 2 deletions

View File

@@ -32,6 +32,7 @@ However:
- for systems where text can be copied to the clipboard at the same time as images, if `*ocr_ignore*` is copied with an image, the image will be ignored - for systems where text can be copied to the clipboard at the same time as images, if `*ocr_ignore*` is copied with an image, the image will be ignored
- optionally, notifications can be enabled in the config file to show the text with a native OS notification - optionally, notifications can be enabled in the config file to show the text with a native OS notification
- optionally, you can speed up the online providers by installing fpng-py: `pip install fpng-py` (requires a developer environment on some operating systems/Python versions) - optionally, you can speed up the online providers by installing fpng-py: `pip install fpng-py` (requires a developer environment on some operating systems/Python versions)
- optionally, you can improve filtering of non-Japanese text for screen capture by installing transformers: `pip install transformers`
- idle resource usage on macOS and Windows when reading from the clipboard has been eliminated using native OS polling - idle resource usage on macOS and Windows when reading from the clipboard has been eliminated using native OS polling
- a config file (to be created in `user directory/.config/owocr_config.ini`, on Windows `user directory` is the `C:\Users\yourusername` folder) can be used to configure the script, as an example to limit providers (to reduce clutter/memory usage) as well as specifying provider settings such as api keys etc. A sample config file is provided [here](https://raw.githubusercontent.com/AuroraWright/owocr/master/owocr_config.ini) - a config file (to be created in `user directory/.config/owocr_config.ini`, on Windows `user directory` is the `C:\Users\yourusername` folder) can be used to configure the script, as an example to limit providers (to reduce clutter/memory usage) as well as specifying provider settings such as api keys etc. A sample config file is provided [here](https://raw.githubusercontent.com/AuroraWright/owocr/master/owocr_config.ini)

View File

@@ -19,7 +19,6 @@ from PIL import UnidentifiedImageError
from loguru import logger from loguru import logger
from pynput import keyboard from pynput import keyboard
from notifypy import Notify from notifypy import Notify
from pysbd import Segmenter
import inspect import inspect
from owocr.ocr import * from owocr.ocr import *
@@ -125,6 +124,7 @@ class TextFiltering:
accurate_filtering = False accurate_filtering = False
def __init__(self): def __init__(self):
from pysbd import Segmenter
self.segmenter = Segmenter(language='ja', clean=True) self.segmenter = Segmenter(language='ja', clean=True)
try: try:
from transformers import pipeline, AutoTokenizer from transformers import pipeline, AutoTokenizer

View File

@@ -10,5 +10,6 @@ notify-py
mss mss
pywinctl pywinctl
pysbd pysbd
langid
pywin32;platform_system=='Windows' pywin32;platform_system=='Windows'
pyobjc;platform_system=='Darwin' pyobjc;platform_system=='Darwin'

View File

@@ -5,7 +5,7 @@ long_description = (Path(__file__).parent / "README.md").read_text('utf-8')
setup( setup(
name="owocr", name="owocr",
version='1.5', version='1.5.1',
description="Japanese OCR", description="Japanese OCR",
long_description=long_description, long_description=long_description,
long_description_content_type="text/markdown", long_description_content_type="text/markdown",
@@ -31,6 +31,7 @@ setup(
"mss", "mss",
"pywinctl", "pywinctl",
"pysbd", "pysbd",
"langid",
"pywin32;platform_system=='Windows'", "pywin32;platform_system=='Windows'",
"pyobjc;platform_system=='Darwin'" "pyobjc;platform_system=='Darwin'"
], ],