From 3b6c6d3f1fb13952205531354690b721776c18dc Mon Sep 17 00:00:00 2001 From: AuroraWright Date: Sun, 14 Jan 2024 13:40:10 +0100 Subject: [PATCH] Add writing text to a websocket for the modern text hooking pages --- README.md | 1 + owocr/run.py | 48 +++++++++++++++++++++++++++++++++++++++++++++--- requirements.txt | 1 + setup.py | 1 + 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c180d21..14de4c9 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ This has been tested with Python 3.11. Newer/older versions might work. For now It mostly functions like Manga OCR: https://github.com/kha-white/manga-ocr?tab=readme-ov-file#running-in-the-background However: +- it supports writing text to a websocket when the -w=websocket parameter is specified (port 7331) - you can pause/unpause the clipboard image processing by pressing "p" or terminate the script with "t" or "q" - you can switch OCR provider with its corresponding keyboard key (refer to the list above). You can also start the script paused with the -p option or with a specific provider with the -e option (refer to `owocr -h` for the list) - holding ctrl or cmd at any time will pause the clipboard image processing temporarily diff --git a/owocr/run.py b/owocr/run.py index 34d169b..d591fa2 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -8,6 +8,8 @@ from pathlib import Path import fire import numpy as np import pyperclip +import asyncio +import websockets from PIL import Image from PIL import UnidentifiedImageError from loguru import logger @@ -17,6 +19,37 @@ import inspect from owocr import * +class WebsocketServerThread(threading.Thread): + def __init__(self): + super().__init__() + self.daemon = True + self.loop = asyncio.new_event_loop() + self.connected = set() + + async def send_text_coroutine(self, text): + for conn in self.connected: + await conn.send(text) + + def send_text(self, text): + return asyncio.run_coroutine_threadsafe(self.send_text_coroutine(text), self.loop) + + async def server_handler(self, websocket): + logger.info("Websocket client connected") + self.connected.add(websocket) + try: + async for message in websocket: + pass + finally: + self.connected.remove(websocket) + + def run(self): + asyncio.set_event_loop(self.loop) + start_server = websockets.serve(self.server_handler, 'localhost', 7331) + self.loop.run_until_complete(start_server) + self.loop.run_forever() + self.loop.close() + + def are_images_identical(img1, img2): if None in (img1, img2): return img1 == img2 @@ -34,7 +67,9 @@ def process_and_write_results(engine_instance, engine_color, img_or_path, write_ logger.opt(ansi=True).info(f"Text recognized in {t1 - t0:0.03f}s using <{engine_color}>{engine_instance.readable_name}: {text}") - if write_to == 'clipboard': + if write_to == 'websocket': + websocket_server_thread.send_text(text) + elif write_to == 'clipboard': pyperclip.copy(text) else: write_to = Path(write_to) @@ -98,7 +133,7 @@ def run(read_from='clipboard', Recognized texts can be either saved to system clipboard, or appended to a text file. :param read_from: Specifies where to read input images from. Can be either "clipboard", or a path to a directory. - :param write_to: Specifies where to save recognized texts to. Can be either "clipboard", or a path to a text file. + :param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file. :param delay_secs: How often to check for new images, in seconds. :param engine: OCR engine to use. Available: "mangaocr", "gvision", "avision", "azure", "winrtocr", "easyocr", "paddleocr". :param pause_at_startup: Pause at startup. @@ -152,7 +187,7 @@ def run(read_from='clipboard', else: logger.warning('No config file, defaults will be used') - for _,engine_class in sorted(inspect.getmembers(sys.modules[__name__], lambda x: hasattr(x, '__module__') and __package__ in x.__module__ and inspect.isclass(x))): + for _,engine_class in sorted(inspect.getmembers(sys.modules[__name__], lambda x: hasattr(x, '__module__') and __package__ + ".ocr" in x.__module__ and inspect.isclass(x))): if len(config_engines) == 0 or engine_class.name in config_engines: try: engine_instance = engine_class(config[engine_class.name]) @@ -177,6 +212,11 @@ def run(read_from='clipboard', user_input_thread = threading.Thread(target=getchar_thread, daemon=True) user_input_thread.start() + if write_to == 'websocket': + global websocket_server_thread + websocket_server_thread = WebsocketServerThread() + websocket_server_thread.start() + if read_from == 'clipboard': from PIL import ImageGrab @@ -217,6 +257,8 @@ def run(read_from='clipboard', if user_input.lower() in 'tq': if read_from == 'clipboard': tmp_paused_listener.stop() + if write_to == 'websocket': + websocket_server_thread = WebsocketServerThread() user_input_thread.join() logger.info('Terminated!') break diff --git a/requirements.txt b/requirements.txt index 4b57b68..0a1a402 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,5 @@ Pillow>=10.0.0 pyperclip unidic_lite pynput +websockets msvcrt;platform_system=="Windows" \ No newline at end of file diff --git a/setup.py b/setup.py index fd8eba5..d3d54ad 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ setup( "pyperclip", "unidic_lite", "pynput", + "websockets", 'msvcrt;platform_system=="Windows"' ], entry_points={