diff --git a/README.md b/README.md index b240ce6..a5ba216 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ This has been tested with Python 3.11. Newer/older versions might work. For now It mostly functions like Manga OCR: https://github.com/kha-white/manga-ocr?tab=readme-ov-file#running-in-the-background However: -- it supports writing text to a websocket when the -w=websocket parameter is specified (port 7331 by default, configurable in the config file) +- it supports reading images and/or writing text to a websocket when the -r=websocket and/or -w=websocket parameters are specified (port 7331 by default, configurable in the config file) - you can pause/unpause the clipboard image processing by pressing "p" or terminate the script with "t" or "q" - you can switch OCR provider with its corresponding keyboard key (refer to the list above). You can also start the script paused with the -p option or with a specific provider with the -e option (refer to `owocr -h` for the list) - holding ctrl or cmd at any time will pause the clipboard image processing temporarily diff --git a/owocr/run.py b/owocr/run.py index 31811fd..d347489 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -10,6 +10,9 @@ import numpy as np import pyperclip import asyncio import websockets +import queue +import io + from PIL import Image from PIL import UnidentifiedImageError from loguru import logger @@ -20,11 +23,12 @@ from owocr import * class WebsocketServerThread(threading.Thread): - def __init__(self, port): + def __init__(self, port, read): super().__init__() self.daemon = True self.loop = asyncio.new_event_loop() self.port = port + self.read = read self.clients = set() async def send_text_coroutine(self, text): @@ -36,7 +40,8 @@ class WebsocketServerThread(threading.Thread): self.clients.add(websocket) try: async for message in websocket: - pass + if self.read: + websocket_queue.put(message) finally: self.clients.remove(websocket) @@ -136,7 +141,7 @@ def run(read_from='clipboard', Run OCR in the background, waiting for new images to appear either in system clipboard, or a directory. Recognized texts can be either saved to system clipboard, or appended to a text file. - :param read_from: Specifies where to read input images from. Can be either "clipboard", or a path to a directory. + :param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", or a path to a directory. :param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file. :param delay_secs: How often to check for new images, in seconds. :param engine: OCR engine to use. Available: "mangaocr", "gvision", "avision", "azure", "winrtocr", "easyocr", "paddleocr". @@ -239,12 +244,16 @@ def run(read_from='clipboard', on_release=on_key_release) tmp_paused_listener.start() - if write_to == 'websocket': + if read_from == 'websocket' or write_to == 'websocket': global websocket_server_thread - websocket_server_thread = WebsocketServerThread(websocket_port) + websocket_server_thread = WebsocketServerThread(websocket_port, read_from == 'websocket') websocket_server_thread.start() - if read_from == 'clipboard': + if read_from == 'websocket': + global websocket_queue + websocket_queue = queue.Queue() + logger.opt(ansi=True).info(f"Reading from websocket using <{engine_color}>{engine_instances[engine_index].readable_name}{' (paused)' if paused else ''}") + elif read_from == 'clipboard': from PIL import ImageGrab img = None @@ -273,7 +282,7 @@ def run(read_from='clipboard', while True: if user_input != '': if user_input.lower() in 'tq': - if write_to == 'websocket': + if read_from == 'websocket' or write_to == 'websocket': websocket_server_thread.stop_server() websocket_server_thread.join() user_input_thread.join() @@ -304,7 +313,17 @@ def run(read_from='clipboard', user_input = '' - if read_from == 'clipboard': + if read_from == 'websocket': + while True: + try: + item = websocket_queue.get(timeout=delay_secs) + except queue.Empty: + break + else: + if not paused and not tmp_paused: + img = Image.open(io.BytesIO(item)) + process_and_write_results(engine_instances[engine_index], engine_color, img, write_to) + elif read_from == 'clipboard': if not paused and not tmp_paused: if mac_clipboard_polling: old_count = count @@ -333,6 +352,8 @@ def run(read_from='clipboard', if just_unpaused: just_unpaused = False + + time.sleep(delay_secs) else: for path in read_from.iterdir(): if str(path).lower().endswith(allowed_extensions): @@ -352,7 +373,7 @@ def run(read_from='clipboard', if delete_images: Path.unlink(path) - time.sleep(delay_secs) + time.sleep(delay_secs) if __name__ == '__main__': fire.Fire(run)