Implement reading images via websocket too

This commit is contained in:
AuroraWright
2024-01-14 22:31:54 +01:00
parent 186447333e
commit 5905876acf
2 changed files with 31 additions and 10 deletions

View File

@@ -23,7 +23,7 @@ This has been tested with Python 3.11. Newer/older versions might work. For now
It mostly functions like Manga OCR: https://github.com/kha-white/manga-ocr?tab=readme-ov-file#running-in-the-background
However:
- it supports writing text to a websocket when the -w=websocket parameter is specified (port 7331 by default, configurable in the config file)
- it supports reading images and/or writing text to a websocket when the -r=websocket and/or -w=websocket parameters are specified (port 7331 by default, configurable in the config file)
- you can pause/unpause the clipboard image processing by pressing "p" or terminate the script with "t" or "q"
- you can switch OCR provider with its corresponding keyboard key (refer to the list above). You can also start the script paused with the -p option or with a specific provider with the -e option (refer to `owocr -h` for the list)
- holding ctrl or cmd at any time will pause the clipboard image processing temporarily

View File

@@ -10,6 +10,9 @@ import numpy as np
import pyperclip
import asyncio
import websockets
import queue
import io
from PIL import Image
from PIL import UnidentifiedImageError
from loguru import logger
@@ -20,11 +23,12 @@ from owocr import *
class WebsocketServerThread(threading.Thread):
def __init__(self, port):
def __init__(self, port, read):
super().__init__()
self.daemon = True
self.loop = asyncio.new_event_loop()
self.port = port
self.read = read
self.clients = set()
async def send_text_coroutine(self, text):
@@ -36,7 +40,8 @@ class WebsocketServerThread(threading.Thread):
self.clients.add(websocket)
try:
async for message in websocket:
pass
if self.read:
websocket_queue.put(message)
finally:
self.clients.remove(websocket)
@@ -136,7 +141,7 @@ def run(read_from='clipboard',
Run OCR in the background, waiting for new images to appear either in system clipboard, or a directory.
Recognized texts can be either saved to system clipboard, or appended to a text file.
:param read_from: Specifies where to read input images from. Can be either "clipboard", or a path to a directory.
:param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", or a path to a directory.
:param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.
:param delay_secs: How often to check for new images, in seconds.
:param engine: OCR engine to use. Available: "mangaocr", "gvision", "avision", "azure", "winrtocr", "easyocr", "paddleocr".
@@ -239,12 +244,16 @@ def run(read_from='clipboard',
on_release=on_key_release)
tmp_paused_listener.start()
if write_to == 'websocket':
if read_from == 'websocket' or write_to == 'websocket':
global websocket_server_thread
websocket_server_thread = WebsocketServerThread(websocket_port)
websocket_server_thread = WebsocketServerThread(websocket_port, read_from == 'websocket')
websocket_server_thread.start()
if read_from == 'clipboard':
if read_from == 'websocket':
global websocket_queue
websocket_queue = queue.Queue()
logger.opt(ansi=True).info(f"Reading from websocket using <{engine_color}>{engine_instances[engine_index].readable_name}</{engine_color}>{' (paused)' if paused else ''}")
elif read_from == 'clipboard':
from PIL import ImageGrab
img = None
@@ -273,7 +282,7 @@ def run(read_from='clipboard',
while True:
if user_input != '':
if user_input.lower() in 'tq':
if write_to == 'websocket':
if read_from == 'websocket' or write_to == 'websocket':
websocket_server_thread.stop_server()
websocket_server_thread.join()
user_input_thread.join()
@@ -304,7 +313,17 @@ def run(read_from='clipboard',
user_input = ''
if read_from == 'clipboard':
if read_from == 'websocket':
while True:
try:
item = websocket_queue.get(timeout=delay_secs)
except queue.Empty:
break
else:
if not paused and not tmp_paused:
img = Image.open(io.BytesIO(item))
process_and_write_results(engine_instances[engine_index], engine_color, img, write_to)
elif read_from == 'clipboard':
if not paused and not tmp_paused:
if mac_clipboard_polling:
old_count = count
@@ -333,6 +352,8 @@ def run(read_from='clipboard',
if just_unpaused:
just_unpaused = False
time.sleep(delay_secs)
else:
for path in read_from.iterdir():
if str(path).lower().endswith(allowed_extensions):
@@ -352,7 +373,7 @@ def run(read_from='clipboard',
if delete_images:
Path.unlink(path)
time.sleep(delay_secs)
time.sleep(delay_secs)
if __name__ == '__main__':
fire.Fire(run)