Big refactoring, allow second image source

This commit is contained in:
AuroraWright
2025-05-04 08:37:54 +02:00
parent 7a307f4cb9
commit e48f388755
3 changed files with 252 additions and 259 deletions

View File

@@ -12,6 +12,8 @@ parser = argparse.ArgumentParser(prog='owocr', description=textwrap.dedent('''\
parser.add_argument('-r', '--read_from', type=str, default=argparse.SUPPRESS,
help='Where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.')
parser.add_argument('-rs', '--read_from_secondary', type=str, default=argparse.SUPPRESS,
help='Where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.')
parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS,
help='Where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.')
parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS,
@@ -47,6 +49,7 @@ class Config:
__engine_config = {}
__default_config = {
'read_from': 'clipboard',
'read_from_secondary': None,
'write_to': 'clipboard',
'engine': '',
'pause_at_startup': False,

View File

@@ -96,6 +96,22 @@ def post_process(text):
return text
def input_to_pil_image(img):
if isinstance(img, Image.Image):
pil_image = img
elif isinstance(img, (bytes, bytearray)):
pil_image = Image.open(io.BytesIO(img))
elif isinstance(img, Path):
try:
pil_image = Image.open(img)
pil_image.load()
except (UnidentifiedImageError, OSError) as e:
return None
else:
raise ValueError(f'img must be a path, PIL.Image or bytes object, instead got: {img}')
return pil_image
def pil_image_to_bytes(img, img_format='png', png_compression=6, jpeg_quality=80, optimize=False):
if img_format == 'png' and optimized_png_encode and not optimize:
raw_data = img.convert('RGBA').tobytes()
@@ -157,15 +173,14 @@ class MangaOcr:
self.available = True
logger.info('Manga OCR ready')
def __call__(self, img_or_path):
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
img = Image.open(img_or_path)
elif isinstance(img_or_path, Image.Image):
img = img_or_path
else:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
def __call__(self, img):
img = input_to_pil_image(img)
if not img:
return (False, 'Invalid image provided')
x = (True, self.model(img))
img.close()
return x
class GoogleVision:
@@ -188,13 +203,10 @@ class GoogleVision:
except:
logger.warning('Error parsing Google credentials, Google Vision will not work!')
def __call__(self, img_or_path):
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
img = Image.open(img_or_path)
elif isinstance(img_or_path, Image.Image):
img = img_or_path
else:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
def __call__(self, img):
img = input_to_pil_image(img)
if not img:
return (False, 'Invalid image provided')
image_bytes = self._preprocess(img)
image = vision.Image(content=image_bytes)
@@ -207,6 +219,8 @@ class GoogleVision:
texts = response.text_annotations
res = texts[0].description if len(texts) > 0 else ''
x = (True, res)
img.close()
return x
def _preprocess(self, img):
@@ -225,13 +239,10 @@ class GoogleLens:
self.available = True
logger.info('Google Lens ready')
def __call__(self, img_or_path):
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
img = Image.open(img_or_path)
elif isinstance(img_or_path, Image.Image):
img = img_or_path
else:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
def __call__(self, img):
img = input_to_pil_image(img)
if not img:
return (False, 'Invalid image provided')
request = LensOverlayServerRequest()
@@ -298,6 +309,8 @@ class GoogleLens:
res += '\n'
x = (True, res)
img.close()
return x
def _preprocess(self, img):
@@ -305,9 +318,10 @@ class GoogleLens:
aspect_ratio = img.width / img.height
new_w = int(sqrt(3000000 * aspect_ratio))
new_h = int(new_w / aspect_ratio)
img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
img.close()
return (pil_image_to_bytes(img), img.width, img.height)
return (pil_image_to_bytes(img_resized), img_resized.width, img_resized.height)
class GoogleLensWeb:
name = 'glensweb'
@@ -323,13 +337,10 @@ class GoogleLensWeb:
self.available = True
logger.info('Google Lens (web) ready')
def __call__(self, img_or_path):
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
img = Image.open(img_or_path)
elif isinstance(img_or_path, Image.Image):
img = img_or_path
else:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
def __call__(self, img):
img = input_to_pil_image(img)
if not img:
return (False, 'Invalid image provided')
url = 'https://lens.google.com/v3/upload'
files = {'encoded_image': ('image.png', self._preprocess(img), 'image/png')}
@@ -393,6 +404,8 @@ class GoogleLensWeb:
res += '\n'
x = (True, res)
img.close()
return x
def _preprocess(self, img):
@@ -400,9 +413,10 @@ class GoogleLensWeb:
aspect_ratio = img.width / img.height
new_w = int(sqrt(3000000 * aspect_ratio))
new_h = int(new_w / aspect_ratio)
img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
img.close()
return pil_image_to_bytes(img)
return pil_image_to_bytes(img_resized)
class Bing:
name = 'bing'
@@ -415,13 +429,10 @@ class Bing:
self.available = True
logger.info('Bing ready')
def __call__(self, img_or_path):
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
img = Image.open(img_or_path)
elif isinstance(img_or_path, Image.Image):
img = img_or_path
else:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
def __call__(self, img):
img = input_to_pil_image(img)
if not img:
return (False, 'Invalid image provided')
img_bytes = self._preprocess(img)
if not img_bytes:
@@ -515,6 +526,8 @@ class Bing:
res += line['text'] + '\n'
x = (True, res)
img.close()
return x
def _preprocess(self, img):
@@ -526,9 +539,10 @@ class Bing:
resize_factor = max(max_pixel_size / img.width, max_pixel_size / img.height)
new_w = int(img.width * resize_factor)
new_h = int(img.height * resize_factor)
img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
img.close()
img_bytes, _ = limit_image_size(img, max_byte_size)
img_bytes, _ = limit_image_size(img_resized, max_byte_size)
if img_bytes:
res = base64.b64encode(img_bytes).decode('utf-8')
@@ -550,13 +564,10 @@ class AppleVision:
self.available = True
logger.info('Apple Vision ready')
def __call__(self, img_or_path):
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
img = Image.open(img_or_path)
elif isinstance(img_or_path, Image.Image):
img = img_or_path
else:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
def __call__(self, img):
img = input_to_pil_image(img)
if not img:
return (False, 'Invalid image provided')
with objc.autorelease_pool():
req = Vision.VNRecognizeTextRequest.alloc().init()
@@ -579,6 +590,7 @@ class AppleVision:
else:
x = (False, 'Unknown error!')
img.close()
return x
def _preprocess(self, img):
@@ -631,13 +643,10 @@ class AppleLiveText:
self.available = True
logger.info('Apple Live Text ready')
def __call__(self, img_or_path):
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
img = Image.open(img_or_path)
elif isinstance(img_or_path, Image.Image):
img = img_or_path
else:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
def __call__(self, img):
img = input_to_pil_image(img)
if not img:
return (False, 'Invalid image provided')
with objc.autorelease_pool():
analyzer = self.VKCImageAnalyzer.alloc().init()
@@ -691,13 +700,10 @@ class WinRTOCR:
except:
logger.warning('Error reading URL from config, WinRT OCR will not work!')
def __call__(self, img_or_path):
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
img = Image.open(img_or_path)
elif isinstance(img_or_path, Image.Image):
img = img_or_path
else:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
def __call__(self, img):
img = input_to_pil_image(img)
if not img:
return (False, 'Invalid image provided')
if sys.platform == 'win32':
res = winocr.recognize_pil_sync(img, lang='ja')['text']
@@ -716,6 +722,8 @@ class WinRTOCR:
res = res.json()['text']
x = (True, res)
img.close()
return x
def _preprocess(self, img):
@@ -749,13 +757,10 @@ class OneOCR:
except:
logger.warning('Error reading URL from config, OneOCR will not work!')
def __call__(self, img_or_path):
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
img = Image.open(img_or_path)
elif isinstance(img_or_path, Image.Image):
img = img_or_path
else:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
def __call__(self, img):
img = input_to_pil_image(img)
if not img:
return (False, 'Invalid image provided')
if sys.platform == 'win32':
try:
@@ -776,6 +781,8 @@ class OneOCR:
res = res.json()['text']
x = (True, res)
img.close()
return x
def _preprocess(self, img):
@@ -799,13 +806,10 @@ class AzureImageAnalysis:
except:
logger.warning('Error parsing Azure credentials, Azure Image Analysis will not work!')
def __call__(self, img_or_path):
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
img = Image.open(img_or_path)
elif isinstance(img_or_path, Image.Image):
img = img_or_path
else:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
def __call__(self, img):
img = input_to_pil_image(img)
if not img:
return (False, 'Invalid image provided')
try:
read_result = self.client.analyze(image_data=self._preprocess(img), visual_features=[VisualFeatures.READ])
@@ -823,6 +827,8 @@ class AzureImageAnalysis:
return (False, 'Unknown error!')
x = (True, res)
img.close()
return x
def _preprocess(self, img):
@@ -830,9 +836,10 @@ class AzureImageAnalysis:
resize_factor = max(50 / img.width, 50 / img.height)
new_w = int(img.width * resize_factor)
new_h = int(img.height * resize_factor)
img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
img.close()
return pil_image_to_bytes(img)
return pil_image_to_bytes(img_resized)
class EasyOCR:
name = 'easyocr'
@@ -850,13 +857,10 @@ class EasyOCR:
self.available = True
logger.info('EasyOCR ready')
def __call__(self, img_or_path):
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
img = Image.open(img_or_path)
elif isinstance(img_or_path, Image.Image):
img = img_or_path
else:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
def __call__(self, img):
img = input_to_pil_image(img)
if not img:
return (False, 'Invalid image provided')
res = ''
read_result = self.model.readtext(self._preprocess(img), detail=0)
@@ -864,6 +868,8 @@ class EasyOCR:
res += text + '\n'
x = (True, res)
img.close()
return x
def _preprocess(self, img):
@@ -897,13 +903,10 @@ class RapidOCR:
self.available = True
logger.info('RapidOCR ready')
def __call__(self, img_or_path):
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
img = Image.open(img_or_path)
elif isinstance(img_or_path, Image.Image):
img = img_or_path
else:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
def __call__(self, img):
img = input_to_pil_image(img)
if not img:
return (False, 'Invalid image provided')
res = ''
read_results, elapsed = self.model(self._preprocess(img))
@@ -912,6 +915,8 @@ class RapidOCR:
res += read_result[1] + '\n'
x = (True, res)
img.close()
return x
def _preprocess(self, img):
@@ -932,13 +937,10 @@ class OCRSpace:
except:
logger.warning('Error reading API key from config, OCRSpace will not work!')
def __call__(self, img_or_path):
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
img = Image.open(img_or_path)
elif isinstance(img_or_path, Image.Image):
img = img_or_path
else:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
def __call__(self, img):
img = input_to_pil_image(img)
if not img:
return (False, 'Invalid image provided')
img_bytes, img_extension = self._preprocess(img)
if not img_bytes:
@@ -969,6 +971,8 @@ class OCRSpace:
res = res['ParsedResults'][0]['ParsedText']
x = (True, res)
img.close()
return x
def _preprocess(self, img):

View File

@@ -93,7 +93,7 @@ class ClipboardThread(threading.Thread):
1.0
)
return new_image.TIFFRepresentation()
return bytes(new_image.TIFFRepresentation())
def process_message(self, hwnd: int, msg: int, wparam: int, lparam: int):
WM_CLIPBOARDUPDATE = 0x031D
@@ -114,7 +114,7 @@ class ClipboardThread(threading.Thread):
clipboard_text = win32clipboard.GetClipboardData(win32clipboard.CF_UNICODETEXT)
if self.ignore_flag or clipboard_text != '*ocr_ignore*':
img = win32clipboard.GetClipboardData(win32clipboard.CF_DIB)
clipboard_queue.put(img)
image_queue.put((img, False))
win32clipboard.CloseClipboard()
except pywintypes.error:
pass
@@ -165,7 +165,7 @@ class ClipboardThread(threading.Thread):
clipboard_text = pasteboard.stringForType_(NSPasteboardTypeString)
if self.ignore_flag or clipboard_text != '*ocr_ignore*':
img = self.normalize_macos_clipboard(pasteboard.dataForType_(NSPasteboardTypeTIFF))
clipboard_queue.put(img)
image_queue.put((img, False))
else:
old_img = img
try:
@@ -176,7 +176,7 @@ class ClipboardThread(threading.Thread):
if ((not just_unpaused) and isinstance(img, Image.Image) and \
(self.ignore_flag or pyperclipfix.paste() != '*ocr_ignore*') and \
(not self.are_images_identical(img, old_img))):
clipboard_queue.put(img)
image_queue.put((img, False))
just_unpaused = False
@@ -184,6 +184,41 @@ class ClipboardThread(threading.Thread):
time.sleep(sleep_time)
class DirectoryWatcher(threading.Thread):
def __init__(self, path):
super().__init__(daemon=True)
self.path = path
self.delay_secs = config.get_general('delay_secs')
self.last_update = time.time()
self.allowed_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.webp')
def get_path_key(self, path):
return path, path.lstat().st_mtime
def run(self):
old_paths = set()
for path in self.path.iterdir():
if path.suffix.lower() in self.allowed_extensions:
old_paths.add(get_path_key(path))
while not terminated:
if paused:
sleep_time = 0.5
else:
sleep_time = self.delay_secs
for path in self.path.iterdir():
if path.suffix.lower() in self.allowed_extensions:
path_key = self.get_path_key(path)
if path_key not in old_paths:
old_paths.add(path_key)
if not paused:
image_queue.put((path, False))
if not terminated:
time.sleep(sleep_time)
class WebsocketServerThread(threading.Thread):
def __init__(self, read):
super().__init__(daemon=True)
@@ -206,7 +241,7 @@ class WebsocketServerThread(threading.Thread):
try:
async for message in websocket:
if self.read and not paused:
websocket_queue.put(message)
image_queue.put((message, False))
try:
await websocket.send('True')
except websockets.exceptions.ConnectionClosedOK:
@@ -255,7 +290,7 @@ class RequestHandler(socketserver.BaseRequestHandler):
pass
if not paused:
unixsocket_queue.put(img)
image_queue.put((img, False))
conn.sendall(b'True')
else:
conn.sendall(b'False')
@@ -266,7 +301,7 @@ class MacOSWindowTracker(threading.Thread):
super().__init__(daemon=True)
self.stop = False
self.window_id = window_id
self.window_active = False
self.window_active = screencapture_window_active
def run(self):
found = True
@@ -302,8 +337,8 @@ class WindowsWindowTracker(threading.Thread):
self.stop = False
self.window_handle = window_handle
self.only_active = only_active
self.window_active = False
self.window_minimized = False
self.window_active = screencapture_window_active
self.window_minimized = not screencapture_window_visible
def run(self):
found = True
@@ -463,14 +498,8 @@ class TextFiltering:
class ScreenshotClass:
def __init__(self, screen_capture_on_combo):
def __init__(self):
screen_capture_area = config.get_general('screen_capture_area')
if type(screen_capture_area) == tuple:
screen_capture_area = ','.join(map(str, screen_capture_area))
global screencapture_window_active
global screencapture_window_visible
screencapture_window_active = True
screencapture_window_visible = True
self.macos_window_tracker = None
self.windows_window_tracker = None
if screen_capture_area == '':
@@ -519,7 +548,7 @@ class ScreenshotClass:
self.sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height}
logger.opt(ansi=True).info(f'Selected coordinates: {coord_left},{coord_top},{coord_width},{coord_height}')
else:
screen_capture_only_active_windows = (not screen_capture_on_combo) and config.get_general('screen_capture_only_active_windows')
screen_capture_only_active_windows = config.get_general('screen_capture_only_active_windows')
area_invalid_error = '"screen_capture_area" must be empty, "screen_N" where N is a screen number starting from 1, a valid set of coordinates, or a valid window name'
if sys.platform == 'darwin':
if int(platform.mac_ver()[0].split('.')[0]) < 14:
@@ -554,7 +583,6 @@ class ScreenshotClass:
window_title = window_titles[window_index]
if screen_capture_only_active_windows:
screencapture_window_active = False
self.macos_window_tracker = MacOSWindowTracker(self.window_id)
self.macos_window_tracker.start()
logger.opt(ansi=True).info(f'Selected window: {window_title}')
@@ -566,8 +594,6 @@ class ScreenshotClass:
ctypes.windll.shcore.SetProcessDpiAwareness(1)
if screen_capture_only_active_windows:
screencapture_window_active = False
self.windows_window_tracker = WindowsWindowTracker(self.window_handle, screen_capture_only_active_windows)
self.windows_window_tracker.start()
logger.opt(ansi=True).info(f'Selected window: {window_title}')
@@ -595,7 +621,7 @@ class ScreenshotClass:
except queue.Empty:
cg_image = None
if not cg_image:
return None
return 0
width = CGImageGetWidth(cg_image)
height = CGImageGetHeight(cg_image)
raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image))
@@ -620,7 +646,7 @@ class ScreenshotClass:
bmpinfo = save_bitmap.GetInfo()
bmpstr = save_bitmap.GetBitmapBits(True)
except pywintypes.error:
return None
return 0
img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
try:
win32gui.DeleteObject(save_bitmap.GetHandle())
@@ -651,6 +677,9 @@ class AutopauseTimer:
self.timeout = timeout
self.timer_thread = None
def __del__(self):
self.stop()
def start(self):
self.stop()
self.stop_event.clear()
@@ -664,12 +693,12 @@ class AutopauseTimer:
def _countdown(self):
seconds = self.timeout
while seconds > 0 and not self.stop_event.is_set():
while seconds > 0 and not self.stop_event.is_set() and not terminated:
time.sleep(1)
seconds -= 1
if not self.stop_event.is_set():
self.stop_event.set()
if not paused:
if not (paused or terminated):
pause_handler(True)
@@ -775,7 +804,7 @@ def on_window_minimized(minimized):
def on_screenshot_combo():
if not paused:
img = take_screenshot()
screenshot_queue.put(img)
image_queue.put((img, True))
def process_and_write_results(img_or_path, last_result, filtering):
@@ -783,9 +812,9 @@ def process_and_write_results(img_or_path, last_result, filtering):
auto_pause_handler.stop()
engine_instance = engine_instances[engine_index]
t0 = time.time()
start_time = time.time()
res, text = engine_instance(img_or_path)
t1 = time.time()
end_time = time.time()
orig_text = []
engine_color = config.get_general('engine_color')
@@ -793,7 +822,7 @@ def process_and_write_results(img_or_path, last_result, filtering):
if filtering:
text, orig_text = filtering(text, last_result)
text = post_process(text)
logger.opt(ansi=True).info(f'Text recognized in {t1 - t0:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
logger.opt(ansi=True).info(f'Text recognized in {end_time - start_time:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
if config.get_general('notifications'):
notifier.send(title='owocr', message='Text recognized: ' + text)
@@ -809,15 +838,11 @@ def process_and_write_results(img_or_path, last_result, filtering):
if auto_pause_handler and not paused:
auto_pause_handler.start()
else:
logger.opt(ansi=True).info(f'<{engine_color}>{engine_instance.readable_name}</{engine_color}> reported an error after {t1 - t0:0.03f}s: {text}')
logger.opt(ansi=True).info(f'<{engine_color}>{engine_instance.readable_name}</{engine_color}> reported an error after {end_time - start_time:0.03f}s: {text}')
return orig_text
def get_path_key(path):
return path, path.lstat().st_mtime
def run():
logger.configure(handlers=[{'sink': sys.stderr, 'format': config.get_general('logger_format')}])
@@ -861,22 +886,33 @@ def run():
global paused
global notifier
global auto_pause_handler
global websocket_server_thread
global image_queue
non_path_inputs = ('screencapture', 'clipboard', 'websocket', 'unixsocket')
read_from = config.get_general('read_from')
read_from_secondary = config.get_general('read_from_secondary')
read_from_path = None
read_from_readable = []
write_to = config.get_general('write_to')
terminated = False
paused = config.get_general('pause_at_startup')
auto_pause = config.get_general('auto_pause')
clipboard_thread = None
websocket_server_thread = None
directory_watcher_thread = None
unix_socket_server = None
key_combo_listener = None
filtering = None
auto_pause_handler = None
engine_index = engine_keys.index(default_engine) if default_engine != '' else 0
engine_color = config.get_general('engine_color')
combo_pause = config.get_general('combo_pause')
combo_engine_switch = config.get_general('combo_engine_switch')
screen_capture_on_combo = False
notifier = DesktopNotifierSync()
image_queue = queue.Queue()
key_combos = {}
if read_from != 'screencapture' and auto_pause != 0:
auto_pause_handler = AutopauseTimer(auto_pause)
if combo_pause != '':
key_combos[combo_pause] = pause_handler
if combo_engine_switch != '':
@@ -885,64 +921,51 @@ def run():
else:
raise ValueError('combo_pause must also be specified')
if read_from == 'websocket' or write_to == 'websocket':
global websocket_server_thread
websocket_server_thread = WebsocketServerThread(read_from == 'websocket')
if 'websocket' in (read_from, read_from_secondary) or write_to == 'websocket':
websocket_server_thread = WebsocketServerThread('websocket' in (read_from, read_from_secondary))
websocket_server_thread.start()
if read_from == 'websocket':
global websocket_queue
websocket_queue = queue.Queue()
read_from_readable = 'websocket'
elif read_from == 'unixsocket':
if 'screencapture' in (read_from, read_from_secondary):
global screencapture_window_active
global screencapture_window_visible
global take_screenshot
screencapture_window_active = False
screencapture_window_visible = True
screen_capture_delay_secs = config.get_general('screen_capture_delay_secs')
screen_capture_combo = config.get_general('screen_capture_combo')
last_screenshot_time = 0
last_result = ([], engine_index)
if screen_capture_combo != '':
screen_capture_on_combo = True
key_combos[screen_capture_combo] = on_screenshot_combo
take_screenshot = ScreenshotClass()
filtering = TextFiltering()
read_from_readable.append('screen capture')
if 'websocket' in (read_from, read_from_secondary):
read_from_readable.append('websocket')
if 'unixsocket' in (read_from, read_from_secondary):
if sys.platform == 'win32':
raise ValueError('"unixsocket" is not currently supported on Windows')
global unixsocket_queue
unixsocket_queue = queue.Queue()
socket_path = Path('/tmp/owocr.sock')
if socket_path.exists():
socket_path.unlink()
unix_socket_server = socketserver.ThreadingUnixStreamServer(str(socket_path), RequestHandler)
unix_socket_server_thread = threading.Thread(target=unix_socket_server.serve_forever, daemon=True)
unix_socket_server_thread.start()
read_from_readable = 'unix socket'
elif read_from == 'clipboard':
global clipboard_queue
clipboard_queue = queue.Queue()
read_from_readable.append('unix socket')
if 'clipboard' in (read_from, read_from_secondary):
clipboard_thread = ClipboardThread()
clipboard_thread.start()
read_from_readable = 'clipboard'
elif read_from == 'screencapture':
screen_capture_delay_secs = config.get_general('screen_capture_delay_secs')
screen_capture_combo = config.get_general('screen_capture_combo')
last_result = ([], engine_index)
if screen_capture_combo != '':
screen_capture_on_combo = True
global screenshot_queue
screenshot_queue = queue.Queue()
key_combos[screen_capture_combo] = on_screenshot_combo
else:
screen_capture_on_combo = False
global take_screenshot
take_screenshot = ScreenshotClass(screen_capture_on_combo)
filtering = TextFiltering()
read_from_readable = 'screen capture'
else:
delay_secs = config.get_general('delay_secs')
read_from_readable.append('clipboard')
if any(i and i not in non_path_inputs for i in (read_from, read_from_secondary)):
if all(i and i not in non_path_inputs for i in (read_from, read_from_secondary)):
raise ValueError("read_from and read_from_secondary can't both be directory paths")
delete_images = config.get_general('delete_images')
read_from = Path(read_from)
if not read_from.is_dir():
raise ValueError('read_from must be either "websocket", "unixsocket", "clipboard", "screencapture", or a path to a directory')
allowed_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.webp')
old_paths = set()
for path in read_from.iterdir():
if path.suffix.lower() in allowed_extensions:
old_paths.add(get_path_key(path))
read_from_readable = f'directory {read_from}'
read_from_path = Path(read_from) if read_from not in non_path_inputs else Path(read_from_secondary)
if not read_from_path.is_dir():
raise ValueError('read_from and read_from_secondary must be either "websocket", "unixsocket", "clipboard", "screencapture", or a path to a directory')
directory_watcher_thread = DirectoryWatcher(read_from_path)
directory_watcher_thread.start()
read_from_readable.append(f'directory {read_from_path}')
if len(key_combos) > 0:
key_combo_listener = keyboard.GlobalHotKeys(key_combos)
@@ -955,99 +978,62 @@ def run():
raise ValueError('write_to must be either "websocket", "clipboard" or a path to a text file')
write_to_readable = f'file {write_to}'
process_queue = (any(i in ('clipboard', 'websocket', 'unixsocket') for i in (read_from, read_from_secondary)) or read_from_path or screen_capture_on_combo)
process_screenshots = 'screencapture' in (read_from, read_from_secondary) and not screen_capture_on_combo
signal.signal(signal.SIGINT, signal_handler)
if (not process_screenshots) and auto_pause != 0:
auto_pause_handler = AutopauseTimer(auto_pause)
user_input_thread = threading.Thread(target=user_input_thread_run, daemon=True)
user_input_thread.start()
logger.opt(ansi=True).info(f"Reading from {read_from_readable}, writing to {write_to_readable} using <{engine_color}>{engine_instances[engine_index].readable_name}</{engine_color}>{' (paused)' if paused else ''}")
logger.opt(ansi=True).info(f"Reading from {' and '.join(read_from_readable)}, writing to {write_to_readable} using <{engine_color}>{engine_instances[engine_index].readable_name}</{engine_color}>{' (paused)' if paused else ''}")
while not terminated:
sleep_time = 0
if read_from == 'websocket':
while True:
try:
item = websocket_queue.get(timeout=0.5)
except queue.Empty:
break
else:
if not paused:
img = Image.open(io.BytesIO(item))
process_and_write_results(img, None, None)
elif read_from == 'unixsocket':
while True:
try:
item = unixsocket_queue.get(timeout=0.5)
except queue.Empty:
break
else:
img = Image.open(io.BytesIO(item))
process_and_write_results(img, None, None)
elif read_from == 'clipboard':
while True:
try:
item = clipboard_queue.get(timeout=0.5)
except queue.Empty:
break
else:
img = item if isinstance(item, Image.Image) else Image.open(io.BytesIO(item))
process_and_write_results(img, None, None)
elif read_from == 'screencapture':
img = None
if screen_capture_on_combo:
try:
img = screenshot_queue.get(timeout=0.5)
except queue.Empty:
pass
else:
if not img:
on_window_closed(False)
terminated = True
break
else:
sleep_time = 0.5
if (not paused) and screencapture_window_active and screencapture_window_visible:
img = take_screenshot()
if not img:
on_window_closed(False)
terminated = True
break
sleep_time = screen_capture_delay_secs
if img:
start_time = time.time()
img = None
filter_img = False
if process_queue:
try:
img, filter_img = image_queue.get(timeout=0.1)
except queue.Empty:
pass
if (not img) and process_screenshots:
if (not paused) and screencapture_window_active and screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs:
img = take_screenshot()
filter_img = True
last_screenshot_time = time.time()
if img == 0:
on_window_closed(False)
terminated = True
break
elif img:
if filter_img:
res = process_and_write_results(img, last_result, filtering)
if res:
last_result = (res, engine_index)
else:
sleep_time = delay_secs
for path in read_from.iterdir():
if path.suffix.lower() in allowed_extensions:
path_key = get_path_key(path)
if path_key not in old_paths:
old_paths.add(path_key)
else:
process_and_write_results(img, None, None)
if isinstance(img, Path):
if delete_images:
Path.unlink(img)
if not paused:
try:
img = Image.open(path)
img.load()
except (UnidentifiedImageError, OSError) as e:
logger.warning(f'Error while reading file {path}: {e}')
else:
process_and_write_results(img, None, None)
img.close()
if delete_images:
Path.unlink(path)
if not terminated:
time.sleep(sleep_time)
elapsed_time = time.time() - start_time
if (not terminated) and elapsed_time < 0.1:
time.sleep(0.1 - elapsed_time)
if read_from == 'websocket' or write_to == 'websocket':
if websocket_server_thread:
websocket_server_thread.stop_server()
websocket_server_thread.join()
if read_from == 'clipboard':
if clipboard_thread:
if sys.platform == 'win32':
win32api.PostThreadMessage(clipboard_thread.thread_id, win32con.WM_QUIT, 0, 0)
clipboard_thread.join()
elif read_from == 'unixsocket':
if directory_watcher_thread:
directory_watcher_thread.join()
if unix_socket_server:
unix_socket_server.shutdown()
unix_socket_server_thread.join()
if len(key_combos) > 0:
if key_combo_listener:
key_combo_listener.stop()
if auto_pause_handler:
auto_pause_handler.stop()