Fix Linux crashes

This commit is contained in:
AuroraWright
2025-06-16 16:41:08 +02:00
parent 29b1be10a3
commit 2cb828d1da
3 changed files with 139 additions and 114 deletions

View File

@@ -97,11 +97,13 @@ def post_process(text):
def input_to_pil_image(img): def input_to_pil_image(img):
is_path = False
if isinstance(img, Image.Image): if isinstance(img, Image.Image):
pil_image = img pil_image = img
elif isinstance(img, (bytes, bytearray)): elif isinstance(img, (bytes, bytearray)):
pil_image = Image.open(io.BytesIO(img)) pil_image = Image.open(io.BytesIO(img))
elif isinstance(img, Path): elif isinstance(img, Path):
is_path = True
try: try:
pil_image = Image.open(img) pil_image = Image.open(img)
pil_image.load() pil_image.load()
@@ -109,7 +111,7 @@ def input_to_pil_image(img):
return None return None
else: else:
raise ValueError(f'img must be a path, PIL.Image or bytes object, instead got: {img}') raise ValueError(f'img must be a path, PIL.Image or bytes object, instead got: {img}')
return pil_image return pil_image, is_path
def pil_image_to_bytes(img, img_format='png', png_compression=6, jpeg_quality=80, optimize=False): def pil_image_to_bytes(img, img_format='png', png_compression=6, jpeg_quality=80, optimize=False):
@@ -174,12 +176,13 @@ class MangaOcr:
logger.info('Manga OCR ready') logger.info('Manga OCR ready')
def __call__(self, img): def __call__(self, img):
img = input_to_pil_image(img) img, is_path = input_to_pil_image(img)
if not img: if not img:
return (False, 'Invalid image provided') return (False, 'Invalid image provided')
x = (True, self.model(img)) x = (True, self.model(img))
if is_path:
img.close() img.close()
return x return x
@@ -204,7 +207,7 @@ class GoogleVision:
logger.warning('Error parsing Google credentials, Google Vision will not work!') logger.warning('Error parsing Google credentials, Google Vision will not work!')
def __call__(self, img): def __call__(self, img):
img = input_to_pil_image(img) img, is_path = input_to_pil_image(img)
if not img: if not img:
return (False, 'Invalid image provided') return (False, 'Invalid image provided')
@@ -220,6 +223,7 @@ class GoogleVision:
res = texts[0].description if len(texts) > 0 else '' res = texts[0].description if len(texts) > 0 else ''
x = (True, res) x = (True, res)
if is_path:
img.close() img.close()
return x return x
@@ -240,7 +244,7 @@ class GoogleLens:
logger.info('Google Lens ready') logger.info('Google Lens ready')
def __call__(self, img): def __call__(self, img):
img = input_to_pil_image(img) img, is_path = input_to_pil_image(img)
if not img: if not img:
return (False, 'Invalid image provided') return (False, 'Invalid image provided')
@@ -310,6 +314,7 @@ class GoogleLens:
x = (True, res) x = (True, res)
if is_path:
img.close() img.close()
return x return x
@@ -318,9 +323,7 @@ class GoogleLens:
aspect_ratio = img.width / img.height aspect_ratio = img.width / img.height
new_w = int(sqrt(3000000 * aspect_ratio)) new_w = int(sqrt(3000000 * aspect_ratio))
new_h = int(new_w / aspect_ratio) new_h = int(new_w / aspect_ratio)
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS) img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
img.close()
img = img_resized
return (pil_image_to_bytes(img), img.width, img.height) return (pil_image_to_bytes(img), img.width, img.height)
@@ -339,7 +342,7 @@ class GoogleLensWeb:
logger.info('Google Lens (web) ready') logger.info('Google Lens (web) ready')
def __call__(self, img): def __call__(self, img):
img = input_to_pil_image(img) img, is_path = input_to_pil_image(img)
if not img: if not img:
return (False, 'Invalid image provided') return (False, 'Invalid image provided')
@@ -406,6 +409,7 @@ class GoogleLensWeb:
x = (True, res) x = (True, res)
if is_path:
img.close() img.close()
return x return x
@@ -414,9 +418,7 @@ class GoogleLensWeb:
aspect_ratio = img.width / img.height aspect_ratio = img.width / img.height
new_w = int(sqrt(3000000 * aspect_ratio)) new_w = int(sqrt(3000000 * aspect_ratio))
new_h = int(new_w / aspect_ratio) new_h = int(new_w / aspect_ratio)
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS) img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
img.close()
img = img_resized
return pil_image_to_bytes(img) return pil_image_to_bytes(img)
@@ -432,7 +434,7 @@ class Bing:
logger.info('Bing ready') logger.info('Bing ready')
def __call__(self, img): def __call__(self, img):
img = input_to_pil_image(img) img, is_path = input_to_pil_image(img)
if not img: if not img:
return (False, 'Invalid image provided') return (False, 'Invalid image provided')
@@ -529,6 +531,7 @@ class Bing:
x = (True, res) x = (True, res)
if is_path:
img.close() img.close()
return x return x
@@ -541,9 +544,7 @@ class Bing:
resize_factor = max(max_pixel_size / img.width, max_pixel_size / img.height) resize_factor = max(max_pixel_size / img.width, max_pixel_size / img.height)
new_w = int(img.width * resize_factor) new_w = int(img.width * resize_factor)
new_h = int(img.height * resize_factor) new_h = int(img.height * resize_factor)
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS) img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
img.close()
img = img_resized
img_bytes, _ = limit_image_size(img, max_byte_size) img_bytes, _ = limit_image_size(img, max_byte_size)
@@ -568,7 +569,7 @@ class AppleVision:
logger.info('Apple Vision ready') logger.info('Apple Vision ready')
def __call__(self, img): def __call__(self, img):
img = input_to_pil_image(img) img, is_path = input_to_pil_image(img)
if not img: if not img:
return (False, 'Invalid image provided') return (False, 'Invalid image provided')
@@ -593,6 +594,7 @@ class AppleVision:
else: else:
x = (False, 'Unknown error!') x = (False, 'Unknown error!')
if is_path:
img.close() img.close()
return x return x
@@ -647,7 +649,7 @@ class AppleLiveText:
logger.info('Apple Live Text ready') logger.info('Apple Live Text ready')
def __call__(self, img): def __call__(self, img):
img = input_to_pil_image(img) img, is_path = input_to_pil_image(img)
if not img: if not img:
return (False, 'Invalid image provided') return (False, 'Invalid image provided')
@@ -704,7 +706,7 @@ class WinRTOCR:
logger.warning('Error reading URL from config, WinRT OCR will not work!') logger.warning('Error reading URL from config, WinRT OCR will not work!')
def __call__(self, img): def __call__(self, img):
img = input_to_pil_image(img) img, is_path = input_to_pil_image(img)
if not img: if not img:
return (False, 'Invalid image provided') return (False, 'Invalid image provided')
@@ -726,6 +728,7 @@ class WinRTOCR:
x = (True, res) x = (True, res)
if is_path:
img.close() img.close()
return x return x
@@ -761,7 +764,7 @@ class OneOCR:
logger.warning('Error reading URL from config, OneOCR will not work!') logger.warning('Error reading URL from config, OneOCR will not work!')
def __call__(self, img): def __call__(self, img):
img = input_to_pil_image(img) img, is_path = input_to_pil_image(img)
if not img: if not img:
return (False, 'Invalid image provided') return (False, 'Invalid image provided')
@@ -785,6 +788,7 @@ class OneOCR:
x = (True, res) x = (True, res)
if is_path:
img.close() img.close()
return x return x
@@ -810,7 +814,7 @@ class AzureImageAnalysis:
logger.warning('Error parsing Azure credentials, Azure Image Analysis will not work!') logger.warning('Error parsing Azure credentials, Azure Image Analysis will not work!')
def __call__(self, img): def __call__(self, img):
img = input_to_pil_image(img) img, is_path = input_to_pil_image(img)
if not img: if not img:
return (False, 'Invalid image provided') return (False, 'Invalid image provided')
@@ -831,6 +835,7 @@ class AzureImageAnalysis:
x = (True, res) x = (True, res)
if is_path:
img.close() img.close()
return x return x
@@ -839,9 +844,7 @@ class AzureImageAnalysis:
resize_factor = max(50 / img.width, 50 / img.height) resize_factor = max(50 / img.width, 50 / img.height)
new_w = int(img.width * resize_factor) new_w = int(img.width * resize_factor)
new_h = int(img.height * resize_factor) new_h = int(img.height * resize_factor)
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS) img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
img.close()
img = img_resized
return pil_image_to_bytes(img) return pil_image_to_bytes(img)
@@ -862,7 +865,7 @@ class EasyOCR:
logger.info('EasyOCR ready') logger.info('EasyOCR ready')
def __call__(self, img): def __call__(self, img):
img = input_to_pil_image(img) img, is_path = input_to_pil_image(img)
if not img: if not img:
return (False, 'Invalid image provided') return (False, 'Invalid image provided')
@@ -873,6 +876,7 @@ class EasyOCR:
x = (True, res) x = (True, res)
if is_path:
img.close() img.close()
return x return x
@@ -908,7 +912,7 @@ class RapidOCR:
logger.info('RapidOCR ready') logger.info('RapidOCR ready')
def __call__(self, img): def __call__(self, img):
img = input_to_pil_image(img) img, is_path = input_to_pil_image(img)
if not img: if not img:
return (False, 'Invalid image provided') return (False, 'Invalid image provided')
@@ -920,6 +924,7 @@ class RapidOCR:
x = (True, res) x = (True, res)
if is_path:
img.close() img.close()
return x return x
@@ -942,7 +947,7 @@ class OCRSpace:
logger.warning('Error reading API key from config, OCRSpace will not work!') logger.warning('Error reading API key from config, OCRSpace will not work!')
def __call__(self, img): def __call__(self, img):
img = input_to_pil_image(img) img, is_path = input_to_pil_image(img)
if not img: if not img:
return (False, 'Invalid image provided') return (False, 'Invalid image provided')
@@ -976,6 +981,7 @@ class OCRSpace:
res = res['ParsedResults'][0]['ParsedText'] res = res['ParsedResults'][0]['ParsedText']
x = (True, res) x = (True, res)
if is_path:
img.close() img.close()
return x return x

View File

@@ -364,13 +364,15 @@ class TextFiltering:
return text, orig_text_filtered return text, orig_text_filtered
class ScreenshotClass: class ScreenshotThread(threading.Thread):
def __init__(self): def __init__(self, screen_capture_on_combo):
super().__init__(daemon=True)
screen_capture_area = config.get_general('screen_capture_area') screen_capture_area = config.get_general('screen_capture_area')
self.macos_window_tracker_instance = None self.macos_window_tracker_instance = None
self.windows_window_tracker_instance = None self.windows_window_tracker_instance = None
self.screencapture_window_active = True self.screencapture_window_active = True
self.screencapture_window_visible = True self.screencapture_window_visible = True
self.use_periodic_queue = not screen_capture_on_combo
if screen_capture_area == '': if screen_capture_area == '':
self.screencapture_mode = 0 self.screencapture_mode = 0
elif screen_capture_area.startswith('screen_'): elif screen_capture_area.startswith('screen_'):
@@ -385,10 +387,10 @@ class ScreenshotClass:
self.screencapture_mode = 2 self.screencapture_mode = 2
if self.screencapture_mode != 2: if self.screencapture_mode != 2:
self.sct = mss.mss() sct = mss.mss()
if self.screencapture_mode == 1: if self.screencapture_mode == 1:
mon = self.sct.monitors mon = sct.monitors
if len(mon) <= screen_capture_monitor: if len(mon) <= screen_capture_monitor:
raise ValueError('Invalid monitor number in screen_capture_area') raise ValueError('Invalid monitor number in screen_capture_area')
coord_left = mon[screen_capture_monitor]['left'] coord_left = mon[screen_capture_monitor]['left']
@@ -468,12 +470,6 @@ class ScreenshotClass:
else: else:
raise ValueError('Window capture is only currently supported on Windows and macOS') raise ValueError('Window capture is only currently supported on Windows and macOS')
def __del__(self):
if self.macos_window_tracker_instance:
self.macos_window_tracker_instance.join()
elif self.windows_window_tracker_instance:
self.windows_window_tracker_instance.join()
def get_windows_window_handle(self, window_title): def get_windows_window_handle(self, window_title):
def callback(hwnd, window_title_part): def callback(hwnd, window_title_part):
window_title = win32gui.GetWindowText(hwnd) window_title = win32gui.GetWindowText(hwnd)
@@ -580,7 +576,16 @@ class ScreenshotClass:
if not found: if not found:
on_window_closed(False) on_window_closed(False)
def __call__(self): def write_result(self, result):
if self.use_periodic_queue:
periodic_screenshot_queue.put(result)
else:
image_queue.put((result, True))
def run(self):
if self.screencapture_mode != 2:
sct = mss.mss()
while screenshot_event.wait() and not terminated:
if self.screencapture_mode == 2: if self.screencapture_mode == 2:
if sys.platform == 'darwin': if sys.platform == 'darwin':
with objc.autorelease_pool(): with objc.autorelease_pool():
@@ -593,7 +598,8 @@ class ScreenshotClass:
except queue.Empty: except queue.Empty:
cg_image = None cg_image = None
if not cg_image: if not cg_image:
return 0 self.write_result(0)
break
width = CGImageGetWidth(cg_image) width = CGImageGetWidth(cg_image)
height = CGImageGetHeight(cg_image) height = CGImageGetHeight(cg_image)
raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image)) raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image))
@@ -618,7 +624,8 @@ class ScreenshotClass:
bmpinfo = save_bitmap.GetInfo() bmpinfo = save_bitmap.GetInfo()
bmpstr = save_bitmap.GetBitmapBits(True) bmpstr = save_bitmap.GetBitmapBits(True)
except pywintypes.error: except pywintypes.error:
return 0 self.write_result(0)
break
img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1) img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
try: try:
win32gui.DeleteObject(save_bitmap.GetHandle()) win32gui.DeleteObject(save_bitmap.GetHandle())
@@ -637,10 +644,16 @@ class ScreenshotClass:
except: except:
pass pass
else: else:
sct_img = self.sct.grab(self.sct_params) sct_img = sct.grab(self.sct_params)
img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX') img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
return img self.write_result(img)
screenshot_event.clear()
if self.macos_window_tracker_instance:
self.macos_window_tracker_instance.join()
elif self.windows_window_tracker_instance:
self.windows_window_tracker_instance.join()
class AutopauseTimer: class AutopauseTimer:
@@ -758,8 +771,7 @@ def on_window_closed(alive):
def on_screenshot_combo(): def on_screenshot_combo():
if not paused: if not paused:
img = take_screenshot() screenshot_event.set()
image_queue.put((img, True))
def process_and_write_results(img_or_path, last_result, filtering, notify): def process_and_write_results(img_or_path, last_result, filtering, notify):
@@ -880,7 +892,8 @@ def run():
websocket_server_thread = WebsocketServerThread('websocket' in (read_from, read_from_secondary)) websocket_server_thread = WebsocketServerThread('websocket' in (read_from, read_from_secondary))
websocket_server_thread.start() websocket_server_thread.start()
if 'screencapture' in (read_from, read_from_secondary): if 'screencapture' in (read_from, read_from_secondary):
global take_screenshot global screenshot_thread
global screenshot_event
screen_capture_delay_secs = config.get_general('screen_capture_delay_secs') screen_capture_delay_secs = config.get_general('screen_capture_delay_secs')
screen_capture_combo = config.get_general('screen_capture_combo') screen_capture_combo = config.get_general('screen_capture_combo')
last_screenshot_time = 0 last_screenshot_time = 0
@@ -888,7 +901,12 @@ def run():
if screen_capture_combo != '': if screen_capture_combo != '':
screen_capture_on_combo = True screen_capture_on_combo = True
key_combos[screen_capture_combo] = on_screenshot_combo key_combos[screen_capture_combo] = on_screenshot_combo
take_screenshot = ScreenshotClass() else:
global periodic_screenshot_queue
periodic_screenshot_queue = queue.Queue()
screenshot_event = threading.Event()
screenshot_thread = ScreenshotThread(screen_capture_on_combo)
screenshot_thread.start()
filtering = TextFiltering() filtering = TextFiltering()
read_from_readable.append('screen capture') read_from_readable.append('screen capture')
if 'websocket' in (read_from, read_from_secondary): if 'websocket' in (read_from, read_from_secondary):
@@ -951,8 +969,9 @@ def run():
pass pass
if (not img) and process_screenshots: if (not img) and process_screenshots:
if (not paused) and take_screenshot.screencapture_window_active and take_screenshot.screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs: if (not paused) and screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs:
img = take_screenshot() screenshot_event.set()
img = periodic_screenshot_queue.get()
filter_img = True filter_img = True
notify = False notify = False
last_screenshot_time = time.time() last_screenshot_time = time.time()

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "owocr" name = "owocr"
version = "1.14" version = "1.14.1"
description = "Japanese OCR" description = "Japanese OCR"
readme = "README.md" readme = "README.md"
requires-python = ">=3.11" requires-python = ">=3.11"