Make second pass multithreaded and refactor coordinate picker to be permanent if needed

This commit is contained in:
AuroraWright
2025-10-12 22:26:45 +02:00
parent 14066bdc95
commit 68626c44a2
3 changed files with 231 additions and 135 deletions

View File

@@ -200,6 +200,7 @@ class MangaOcr:
local = True
manual_language = False
coordinate_support = False
threading_support = True
def __init__(self, config={'pretrained_model_name_or_path':'kha-white/manga-ocr-base','force_cpu': False}):
if 'manga_ocr' not in sys.modules:
@@ -233,6 +234,7 @@ class GoogleVision:
local = False
manual_language = False
coordinate_support = False
threading_support = True
def __init__(self):
if 'google.cloud' not in sys.modules:
@@ -280,6 +282,7 @@ class GoogleLens:
local = False
manual_language = False
coordinate_support = True
threading_support = True
def __init__(self):
if 'betterproto' not in sys.modules:
@@ -427,6 +430,7 @@ class GoogleLensWeb:
local = False
manual_language = False
coordinate_support = False
threading_support = True
def __init__(self):
if 'pyjson5' not in sys.modules:
@@ -524,6 +528,7 @@ class Bing:
local = False
manual_language = False
coordinate_support = True
threading_support = True
def __init__(self):
self.requests_session = requests.Session()
@@ -704,6 +709,7 @@ class AppleVision:
local = True
manual_language = True
coordinate_support = False
threading_support = True
def __init__(self, language='ja'):
if sys.platform != 'darwin':
@@ -756,6 +762,7 @@ class AppleLiveText:
local = True
manual_language = True
coordinate_support = True
threading_support = False
def __init__(self, language='ja'):
if sys.platform != 'darwin':
@@ -897,6 +904,7 @@ class WinRTOCR:
local = True
manual_language = True
coordinate_support = False
threading_support = True
def __init__(self, config={}, language='ja'):
if sys.platform == 'win32':
@@ -955,6 +963,7 @@ class OneOCR:
local = True
manual_language = False
coordinate_support = True
threading_support = True
def __init__(self, config={}):
if sys.platform == 'win32':
@@ -1079,6 +1088,7 @@ class AzureImageAnalysis:
local = False
manual_language = False
coordinate_support = False
threading_support = True
def __init__(self, config={}):
if 'azure.ai.vision.imageanalysis' not in sys.modules:
@@ -1135,6 +1145,7 @@ class EasyOCR:
local = True
manual_language = True
coordinate_support = False
threading_support = True
def __init__(self, config={'gpu': True}, language='ja'):
if 'easyocr' not in sys.modules:
@@ -1173,6 +1184,7 @@ class RapidOCR:
local = True
manual_language = True
coordinate_support = False
threading_support = True
def __init__(self, config={'high_accuracy_detection': False, 'high_accuracy_recognition': True}, language='ja'):
if 'rapidocr' not in sys.modules:
@@ -1238,6 +1250,7 @@ class OCRSpace:
local = False
manual_language = True
coordinate_support = False
threading_support = True
def __init__(self, config={}, language='ja'):
try:

View File

@@ -783,6 +783,7 @@ class ScreenshotThread(threading.Thread):
def __init__(self):
super().__init__(daemon=True)
screen_capture_area = config.get_general('screen_capture_area')
self.coordinate_selector_combo_enabled = config.get_general('coordinate_selector_combo') != ''
self.is_combo_screenshot = False
self.macos_window_tracker_instance = None
self.windows_window_tracker_instance = None
@@ -801,6 +802,9 @@ class ScreenshotThread(threading.Thread):
else:
self.screencapture_mode = 2
if self.coordinate_selector_combo_enabled:
self.launch_coordinate_picker(True, False)
if self.screencapture_mode != 2:
self.sct = mss.mss()
@@ -815,7 +819,7 @@ class ScreenshotThread(threading.Thread):
elif self.screencapture_mode == 3:
coord_left, coord_top, coord_width, coord_height = [int(c.strip()) for c in screen_capture_area.split(',')]
else:
self.launch_coordinate_picker(True)
self.launch_coordinate_picker(False, True)
if self.screencapture_mode != 0:
self.sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height}
@@ -881,7 +885,7 @@ class ScreenshotThread(threading.Thread):
logger.opt(ansi=True).info(f'Selected window coordinates: {x},{y},{x2},{y2}')
self.window_area_coordinates = (img.size, (x, y, x2, y2))
elif screen_capture_window_area == '':
self.launch_coordinate_picker(True)
self.launch_coordinate_picker(False, False)
else:
raise ValueError('"screen_capture_window_area" must be empty, "window" for the whole window, or a valid set of coordinates')
@@ -1066,10 +1070,14 @@ class ScreenshotThread(threading.Thread):
else:
periodic_screenshot_queue.put(result)
def launch_coordinate_picker(self, on_init):
def launch_coordinate_picker(self, init, must_return):
if init:
logger.opt(ansi=True).info('Preloading screen coordinate picker')
get_screen_selection(True, True)
return
if self.screencapture_mode != 2:
logger.opt(ansi=True).info('Launching screen coordinate picker')
screen_selection = get_screen_selection()
screen_selection = get_screen_selection(None, self.coordinate_selector_combo_enabled)
if not screen_selection:
if on_init:
raise ValueError('Picker window was closed or an error occurred')
@@ -1093,7 +1101,7 @@ class ScreenshotThread(threading.Thread):
self.window_area_coordinates = None
img = self.take_screenshot()
logger.opt(ansi=True).info('Launching window coordinate picker')
window_selection = get_screen_selection(img)
window_selection = get_screen_selection(img, self.coordinate_selector_combo_enabled)
if not window_selection:
logger.opt(ansi=True).warning('Picker window was closed or an error occurred, selecting whole window')
else:
@@ -1112,7 +1120,7 @@ class ScreenshotThread(threading.Thread):
while not terminated:
if not screenshot_event.wait(timeout=0.1):
if coordinate_selector_event.is_set():
self.launch_coordinate_picker(False)
self.launch_coordinate_picker(False, False)
coordinate_selector_event.clear()
continue
@@ -1130,33 +1138,77 @@ class ScreenshotThread(threading.Thread):
self.windows_window_tracker_instance.join()
class SecondPassThread:
def __init__(self):
self.input_queue = queue.Queue()
self.output_queue = queue.Queue()
self.ocr_thread = None
self.running = False
def __del__(self):
self.stop()
def start(self):
if self.ocr_thread is None or not self.ocr_thread.is_alive():
self.running = True
self.ocr_thread = threading.Thread(target=self._process_ocr, daemon=True)
self.ocr_thread.start()
def stop(self):
self.running = False
if self.ocr_thread and self.ocr_thread.is_alive():
self.ocr_thread.join()
def _process_ocr(self):
while self.running and not terminated:
try:
img, engine_instance = self.input_queue.get(timeout=0.1)
start_time = time.time()
res, result_data = engine_instance(img)
end_time = time.time()
self.output_queue.put((res, result_data, end_time - start_time))
except queue.Empty:
continue
def submit_task(self, img, engine_instance):
self.input_queue.put((img, engine_instance))
def get_result(self):
try:
return self.output_queue.get_nowait()
except queue.Empty:
return None
class AutopauseTimer:
def __init__(self, timeout):
self.stop_event = threading.Event()
self.timeout = timeout
self.timer_thread = None
self.running = False
def __del__(self):
self.stop()
def start(self):
self.stop()
self.stop_event.clear()
self.running = True
self.timer_thread = threading.Thread(target=self._countdown)
self.timer_thread.start()
def stop(self):
if not self.stop_event.is_set() and self.timer_thread and self.timer_thread.is_alive():
self.stop_event.set()
if self.running and self.timer_thread and self.timer_thread.is_alive():
self.running = False
self.timer_thread.join()
def _countdown(self):
seconds = self.timeout
while seconds > 0 and not self.stop_event.is_set() and not terminated:
while seconds > 0 and self.running and not terminated:
time.sleep(1)
seconds -= 1
if not self.stop_event.is_set():
self.stop_event.set()
if self.running:
self.running = False
if not (paused or terminated):
pause_handler(True)
@@ -1164,6 +1216,10 @@ class AutopauseTimer:
class OutputResult:
def __init__(self):
self.filtering = TextFiltering()
self.second_pass_thread = SecondPassThread()
def __del__(self):
self.second_pass_thread.stop()
def _post_process(self, text, strip_spaces):
is_cj_text = self.filtering.cj_regex.search(''.join(text))
@@ -1195,6 +1251,7 @@ class OutputResult:
two_pass_processing_active = False
if filter_text and engine_index_2 != -1 and engine_index_2 != engine_index:
self.second_pass_thread.start()
engine_instance_2 = engine_instances[engine_index_2]
start_time = time.time()
res2, result_data_2 = engine_instance_2(img_or_path)
@@ -1212,12 +1269,22 @@ class OutputResult:
if output_format != 'json':
if changed_regions_image:
img_or_path = changed_regions_image
else:
return
start_time = time.time()
res, result_data = engine_instance(img_or_path)
end_time = time.time()
if engine_instance.threading_support:
self.second_pass_thread.submit_task(img_or_path, engine_instance)
else:
self.second_pass_thread.stop()
second_pass_result = self.second_pass_thread.get_result()
if second_pass_result:
res, result_data, processing_time = second_pass_result
two_pass_processing_active = True
elif two_pass_processing_active and engine_instance.threading_support:
return
else:
start_time = time.time()
res, result_data = engine_instance(img_or_path)
end_time = time.time()
if not res:
logger.opt(ansi=True).warning(f'<{engine_color}>{engine_instance.readable_name}</{engine_color}> reported an error after {end_time - start_time:0.03f}s: {result_data}')

View File

@@ -1,6 +1,12 @@
from multiprocessing import Process, Manager
import multiprocessing
import queue
import mss
from PIL import Image
import sys
try:
from AppKit import NSApplication, NSApplicationActivationPolicyAccessory
except ImportError:
pass
try:
from PIL import ImageTk
@@ -11,105 +17,20 @@ except:
class ScreenSelector:
def __init__(self, result, input_image=None):
def __init__(self, result_queue, command_queue):
self.sct = mss.mss()
self.monitors = self.sct.monitors[1:]
self.root = None
self.result = result
self.input_image = input_image
self.result_queue = result_queue
self.command_queue = command_queue
self.mac_init_done = False
def on_select(self, monitor, coordinates):
self.result['monitor'] = monitor
self.result['coordinates'] = coordinates
self.root.destroy()
self.result_queue.put({'monitor': monitor, 'coordinates': coordinates})
if self.root:
self.root.destroy()
def create_window_from_image(self, img):
original_width, original_height = img.size
display_monitor = None
for monitor in self.monitors:
if (monitor['width'] >= original_width and
monitor['height'] >= original_height):
display_monitor = monitor
break
if not display_monitor:
display_monitor = self.monitors[0]
window_width = min(original_width, display_monitor['width'])
window_height = min(original_height, display_monitor['height'])
left = display_monitor['left'] + (display_monitor['width'] - window_width) // 2
top = display_monitor['top'] + (display_monitor['height'] - window_height) // 2
window = tk.Toplevel(self.root)
window.geometry(f"{window_width}x{window_height}+{left}+{top}")
window.overrideredirect(1)
window.attributes('-topmost', 1)
# Resize image if it's larger than the window
if img.width > window_width or img.height > window_height:
img = img.resize((window_width, window_height), Image.Resampling.LANCZOS)
scale_x = original_width / window_width
scale_y = original_height / window_height
else:
scale_x = 1
scale_y = 1
img_tk = ImageTk.PhotoImage(img)
canvas = tk.Canvas(window, cursor='cross', highlightthickness=0)
canvas.pack(fill=tk.BOTH, expand=True)
canvas.image = img_tk
canvas.create_image(0, 0, image=img_tk, anchor=tk.NW)
start_x, start_y, rect = None, None, None
def on_click(event):
nonlocal start_x, start_y, rect
start_x, start_y = event.x, event.y
rect = canvas.create_rectangle(start_x, start_y, start_x, start_y, outline='red')
def on_drag(event):
nonlocal rect, start_x, start_y
if rect:
canvas.coords(rect, start_x, start_y, event.x, event.y)
def on_release(event):
nonlocal start_x, start_y, scale_x, scale_y
end_x, end_y = event.x, event.y
x1 = min(start_x, end_x)
y1 = min(start_y, end_y)
x2 = max(start_x, end_x)
y2 = max(start_y, end_y)
x1 = int(x1 * scale_x)
y1 = int(y1 * scale_y)
x2 = int(x2 * scale_x)
y2 = int(y2 * scale_y)
# Return None for monitor when using input image
self.on_select(None, (x1, y1, x2 - x1, y2 - y1))
canvas.bind('<ButtonPress-1>', on_click)
canvas.bind('<B1-Motion>', on_drag)
canvas.bind('<ButtonRelease-1>', on_release)
def create_window(self, monitor):
screenshot = self.sct.grab(monitor)
img = Image.frombytes('RGB', screenshot.size, screenshot.rgb)
if img.width != monitor['width']:
img = img.resize((monitor['width'], monitor['height']), Image.Resampling.LANCZOS)
window = tk.Toplevel(self.root)
window.geometry(f"{monitor['width']}x{monitor['height']}+{monitor['left']}+{monitor['top']}")
window.overrideredirect(1)
window.attributes('-topmost', 1)
img_tk = ImageTk.PhotoImage(img)
canvas = tk.Canvas(window, cursor='cross', highlightthickness=0)
def _setup_selection_canvas(self, canvas, img_tk, scale_x=1, scale_y=1, monitor=None):
canvas.pack(fill=tk.BOTH, expand=True)
canvas.image = img_tk
canvas.create_image(0, 0, image=img_tk, anchor=tk.NW)
@@ -133,7 +54,12 @@ class ScreenSelector:
x1 = min(start_x, end_x)
y1 = min(start_y, end_y)
x2 = max(start_x, end_x)
y2 = max(start_y, end_y)
y2 = max(start_y, end_y)
x1 = int(x1 * scale_x)
y1 = int(y1 * scale_y)
x2 = int(x2 * scale_x)
y2 = int(y2 * scale_y)
self.on_select(monitor, (x1, y1, x2 - x1, y2 - y1))
@@ -141,37 +67,127 @@ class ScreenSelector:
canvas.bind('<B1-Motion>', on_drag)
canvas.bind('<ButtonRelease-1>', on_release)
def start(self):
self.root = tk.Tk()
self.root.withdraw()
def _create_selection_window(self, img, geometry, scale_x=1, scale_y=1, monitor=None):
window = tk.Toplevel(self.root)
window.geometry(geometry)
window.overrideredirect(1)
window.attributes('-topmost', 1)
if self.input_image:
self.create_window_from_image(self.input_image)
img_tk = ImageTk.PhotoImage(img)
canvas = tk.Canvas(window, cursor='cross', highlightthickness=0)
self._setup_selection_canvas(canvas, img_tk, scale_x, scale_y, monitor)
def create_window_from_image(self, img):
original_width, original_height = img.size
display_monitor = None
for monitor in self.monitors:
if (monitor['width'] >= original_width and
monitor['height'] >= original_height):
display_monitor = monitor
break
if not display_monitor:
display_monitor = self.monitors[0]
window_width = min(original_width, display_monitor['width'])
window_height = min(original_height, display_monitor['height'])
left = display_monitor['left'] + (display_monitor['width'] - window_width) // 2
top = display_monitor['top'] + (display_monitor['height'] - window_height) // 2
geometry = f"{window_width}x{window_height}+{left}+{top}"
if img.width > window_width or img.height > window_height:
img = img.resize((window_width, window_height), Image.Resampling.LANCZOS)
scale_x = original_width / window_width
scale_y = original_height / window_height
else:
for monitor in self.monitors:
self.create_window(monitor)
scale_x = 1
scale_y = 1
self.root.mainloop()
self.root.update()
self._create_selection_window(img, geometry, scale_x, scale_y, None)
def create_window(self, monitor):
screenshot = self.sct.grab(monitor)
img = Image.frombytes('RGB', screenshot.size, screenshot.rgb)
original_width, original_height = img.size
geometry = f"{monitor['width']}x{monitor['height']}+{monitor['left']}+{monitor['top']}"
if img.width != monitor['width']:
img = img.resize((monitor['width'], monitor['height']), Image.Resampling.LANCZOS)
scale_x = original_width / monitor['width']
scale_y = original_height / monitor['height']
else:
scale_x = 1
scale_y = 1
self._create_selection_window(img, geometry, scale_x, scale_y, monitor)
def start(self):
while True:
try:
image = self.command_queue.get(timeout=0.1)
except queue.Empty:
continue
if image == False:
break
if image == True:
self.on_select(None, None)
continue
self.root = tk.Tk()
if not self.mac_init_done and sys.platform == 'darwin':
app = NSApplication.sharedApplication()
app.setActivationPolicy_(NSApplicationActivationPolicyAccessory)
self.mac_init_done = True
self.root.withdraw()
if image:
self.create_window_from_image(image)
else:
for monitor in self.monitors:
self.create_window(monitor)
self.root.mainloop()
self.root.update()
self.root = None
def run_screen_selector(result, input_image=None):
selector = ScreenSelector(result, input_image)
def run_screen_selector(result_queue, command_queue):
selector = ScreenSelector(result_queue, command_queue)
selector.start()
selector_process = None
result_queue = None
command_queue = None
def get_screen_selection(pil_image, permanent_process):
global selector_process, result_queue, command_queue
def get_screen_selection(pil_image = None):
if not selector_available:
raise ValueError('tkinter or PIL with tkinter support are not installed, unable to open picker')
with Manager() as manager:
res = manager.dict()
process = Process(target=run_screen_selector, args=(res, pil_image))
process.start()
process.join()
if selector_process is None or not selector_process.is_alive():
result_queue = multiprocessing.Queue()
command_queue = multiprocessing.Queue()
selector_process = multiprocessing.Process(target=run_screen_selector, args=(result_queue, command_queue))
selector_process.daemon = True
selector_process.start()
if 'monitor' in res and 'coordinates' in res:
return res.copy()
else:
return False
command_queue.put(pil_image)
result = False
while (not result) and selector_process.is_alive():
try:
result = result_queue.get(timeout=0.1) # 60 second timeout
except:
continue
if not permanent_process:
command_queue.put(False)
selector_process.join()
return result