Make second pass multithreaded and refactor coordinate picker to be permanent if needed

This commit is contained in:
AuroraWright
2025-10-12 22:26:45 +02:00
parent 14066bdc95
commit 68626c44a2
3 changed files with 231 additions and 135 deletions

View File

@@ -200,6 +200,7 @@ class MangaOcr:
local = True local = True
manual_language = False manual_language = False
coordinate_support = False coordinate_support = False
threading_support = True
def __init__(self, config={'pretrained_model_name_or_path':'kha-white/manga-ocr-base','force_cpu': False}): def __init__(self, config={'pretrained_model_name_or_path':'kha-white/manga-ocr-base','force_cpu': False}):
if 'manga_ocr' not in sys.modules: if 'manga_ocr' not in sys.modules:
@@ -233,6 +234,7 @@ class GoogleVision:
local = False local = False
manual_language = False manual_language = False
coordinate_support = False coordinate_support = False
threading_support = True
def __init__(self): def __init__(self):
if 'google.cloud' not in sys.modules: if 'google.cloud' not in sys.modules:
@@ -280,6 +282,7 @@ class GoogleLens:
local = False local = False
manual_language = False manual_language = False
coordinate_support = True coordinate_support = True
threading_support = True
def __init__(self): def __init__(self):
if 'betterproto' not in sys.modules: if 'betterproto' not in sys.modules:
@@ -427,6 +430,7 @@ class GoogleLensWeb:
local = False local = False
manual_language = False manual_language = False
coordinate_support = False coordinate_support = False
threading_support = True
def __init__(self): def __init__(self):
if 'pyjson5' not in sys.modules: if 'pyjson5' not in sys.modules:
@@ -524,6 +528,7 @@ class Bing:
local = False local = False
manual_language = False manual_language = False
coordinate_support = True coordinate_support = True
threading_support = True
def __init__(self): def __init__(self):
self.requests_session = requests.Session() self.requests_session = requests.Session()
@@ -704,6 +709,7 @@ class AppleVision:
local = True local = True
manual_language = True manual_language = True
coordinate_support = False coordinate_support = False
threading_support = True
def __init__(self, language='ja'): def __init__(self, language='ja'):
if sys.platform != 'darwin': if sys.platform != 'darwin':
@@ -756,6 +762,7 @@ class AppleLiveText:
local = True local = True
manual_language = True manual_language = True
coordinate_support = True coordinate_support = True
threading_support = False
def __init__(self, language='ja'): def __init__(self, language='ja'):
if sys.platform != 'darwin': if sys.platform != 'darwin':
@@ -897,6 +904,7 @@ class WinRTOCR:
local = True local = True
manual_language = True manual_language = True
coordinate_support = False coordinate_support = False
threading_support = True
def __init__(self, config={}, language='ja'): def __init__(self, config={}, language='ja'):
if sys.platform == 'win32': if sys.platform == 'win32':
@@ -955,6 +963,7 @@ class OneOCR:
local = True local = True
manual_language = False manual_language = False
coordinate_support = True coordinate_support = True
threading_support = True
def __init__(self, config={}): def __init__(self, config={}):
if sys.platform == 'win32': if sys.platform == 'win32':
@@ -1079,6 +1088,7 @@ class AzureImageAnalysis:
local = False local = False
manual_language = False manual_language = False
coordinate_support = False coordinate_support = False
threading_support = True
def __init__(self, config={}): def __init__(self, config={}):
if 'azure.ai.vision.imageanalysis' not in sys.modules: if 'azure.ai.vision.imageanalysis' not in sys.modules:
@@ -1135,6 +1145,7 @@ class EasyOCR:
local = True local = True
manual_language = True manual_language = True
coordinate_support = False coordinate_support = False
threading_support = True
def __init__(self, config={'gpu': True}, language='ja'): def __init__(self, config={'gpu': True}, language='ja'):
if 'easyocr' not in sys.modules: if 'easyocr' not in sys.modules:
@@ -1173,6 +1184,7 @@ class RapidOCR:
local = True local = True
manual_language = True manual_language = True
coordinate_support = False coordinate_support = False
threading_support = True
def __init__(self, config={'high_accuracy_detection': False, 'high_accuracy_recognition': True}, language='ja'): def __init__(self, config={'high_accuracy_detection': False, 'high_accuracy_recognition': True}, language='ja'):
if 'rapidocr' not in sys.modules: if 'rapidocr' not in sys.modules:
@@ -1238,6 +1250,7 @@ class OCRSpace:
local = False local = False
manual_language = True manual_language = True
coordinate_support = False coordinate_support = False
threading_support = True
def __init__(self, config={}, language='ja'): def __init__(self, config={}, language='ja'):
try: try:

View File

@@ -783,6 +783,7 @@ class ScreenshotThread(threading.Thread):
def __init__(self): def __init__(self):
super().__init__(daemon=True) super().__init__(daemon=True)
screen_capture_area = config.get_general('screen_capture_area') screen_capture_area = config.get_general('screen_capture_area')
self.coordinate_selector_combo_enabled = config.get_general('coordinate_selector_combo') != ''
self.is_combo_screenshot = False self.is_combo_screenshot = False
self.macos_window_tracker_instance = None self.macos_window_tracker_instance = None
self.windows_window_tracker_instance = None self.windows_window_tracker_instance = None
@@ -801,6 +802,9 @@ class ScreenshotThread(threading.Thread):
else: else:
self.screencapture_mode = 2 self.screencapture_mode = 2
if self.coordinate_selector_combo_enabled:
self.launch_coordinate_picker(True, False)
if self.screencapture_mode != 2: if self.screencapture_mode != 2:
self.sct = mss.mss() self.sct = mss.mss()
@@ -815,7 +819,7 @@ class ScreenshotThread(threading.Thread):
elif self.screencapture_mode == 3: elif self.screencapture_mode == 3:
coord_left, coord_top, coord_width, coord_height = [int(c.strip()) for c in screen_capture_area.split(',')] coord_left, coord_top, coord_width, coord_height = [int(c.strip()) for c in screen_capture_area.split(',')]
else: else:
self.launch_coordinate_picker(True) self.launch_coordinate_picker(False, True)
if self.screencapture_mode != 0: if self.screencapture_mode != 0:
self.sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height} self.sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height}
@@ -881,7 +885,7 @@ class ScreenshotThread(threading.Thread):
logger.opt(ansi=True).info(f'Selected window coordinates: {x},{y},{x2},{y2}') logger.opt(ansi=True).info(f'Selected window coordinates: {x},{y},{x2},{y2}')
self.window_area_coordinates = (img.size, (x, y, x2, y2)) self.window_area_coordinates = (img.size, (x, y, x2, y2))
elif screen_capture_window_area == '': elif screen_capture_window_area == '':
self.launch_coordinate_picker(True) self.launch_coordinate_picker(False, False)
else: else:
raise ValueError('"screen_capture_window_area" must be empty, "window" for the whole window, or a valid set of coordinates') raise ValueError('"screen_capture_window_area" must be empty, "window" for the whole window, or a valid set of coordinates')
@@ -1066,10 +1070,14 @@ class ScreenshotThread(threading.Thread):
else: else:
periodic_screenshot_queue.put(result) periodic_screenshot_queue.put(result)
def launch_coordinate_picker(self, on_init): def launch_coordinate_picker(self, init, must_return):
if init:
logger.opt(ansi=True).info('Preloading screen coordinate picker')
get_screen_selection(True, True)
return
if self.screencapture_mode != 2: if self.screencapture_mode != 2:
logger.opt(ansi=True).info('Launching screen coordinate picker') logger.opt(ansi=True).info('Launching screen coordinate picker')
screen_selection = get_screen_selection() screen_selection = get_screen_selection(None, self.coordinate_selector_combo_enabled)
if not screen_selection: if not screen_selection:
if on_init: if on_init:
raise ValueError('Picker window was closed or an error occurred') raise ValueError('Picker window was closed or an error occurred')
@@ -1093,7 +1101,7 @@ class ScreenshotThread(threading.Thread):
self.window_area_coordinates = None self.window_area_coordinates = None
img = self.take_screenshot() img = self.take_screenshot()
logger.opt(ansi=True).info('Launching window coordinate picker') logger.opt(ansi=True).info('Launching window coordinate picker')
window_selection = get_screen_selection(img) window_selection = get_screen_selection(img, self.coordinate_selector_combo_enabled)
if not window_selection: if not window_selection:
logger.opt(ansi=True).warning('Picker window was closed or an error occurred, selecting whole window') logger.opt(ansi=True).warning('Picker window was closed or an error occurred, selecting whole window')
else: else:
@@ -1112,7 +1120,7 @@ class ScreenshotThread(threading.Thread):
while not terminated: while not terminated:
if not screenshot_event.wait(timeout=0.1): if not screenshot_event.wait(timeout=0.1):
if coordinate_selector_event.is_set(): if coordinate_selector_event.is_set():
self.launch_coordinate_picker(False) self.launch_coordinate_picker(False, False)
coordinate_selector_event.clear() coordinate_selector_event.clear()
continue continue
@@ -1130,33 +1138,77 @@ class ScreenshotThread(threading.Thread):
self.windows_window_tracker_instance.join() self.windows_window_tracker_instance.join()
class SecondPassThread:
def __init__(self):
self.input_queue = queue.Queue()
self.output_queue = queue.Queue()
self.ocr_thread = None
self.running = False
def __del__(self):
self.stop()
def start(self):
if self.ocr_thread is None or not self.ocr_thread.is_alive():
self.running = True
self.ocr_thread = threading.Thread(target=self._process_ocr, daemon=True)
self.ocr_thread.start()
def stop(self):
self.running = False
if self.ocr_thread and self.ocr_thread.is_alive():
self.ocr_thread.join()
def _process_ocr(self):
while self.running and not terminated:
try:
img, engine_instance = self.input_queue.get(timeout=0.1)
start_time = time.time()
res, result_data = engine_instance(img)
end_time = time.time()
self.output_queue.put((res, result_data, end_time - start_time))
except queue.Empty:
continue
def submit_task(self, img, engine_instance):
self.input_queue.put((img, engine_instance))
def get_result(self):
try:
return self.output_queue.get_nowait()
except queue.Empty:
return None
class AutopauseTimer: class AutopauseTimer:
def __init__(self, timeout): def __init__(self, timeout):
self.stop_event = threading.Event()
self.timeout = timeout self.timeout = timeout
self.timer_thread = None self.timer_thread = None
self.running = False
def __del__(self): def __del__(self):
self.stop() self.stop()
def start(self): def start(self):
self.stop() self.stop()
self.stop_event.clear() self.running = True
self.timer_thread = threading.Thread(target=self._countdown) self.timer_thread = threading.Thread(target=self._countdown)
self.timer_thread.start() self.timer_thread.start()
def stop(self): def stop(self):
if not self.stop_event.is_set() and self.timer_thread and self.timer_thread.is_alive(): if self.running and self.timer_thread and self.timer_thread.is_alive():
self.stop_event.set() self.running = False
self.timer_thread.join() self.timer_thread.join()
def _countdown(self): def _countdown(self):
seconds = self.timeout seconds = self.timeout
while seconds > 0 and not self.stop_event.is_set() and not terminated: while seconds > 0 and self.running and not terminated:
time.sleep(1) time.sleep(1)
seconds -= 1 seconds -= 1
if not self.stop_event.is_set(): if self.running:
self.stop_event.set() self.running = False
if not (paused or terminated): if not (paused or terminated):
pause_handler(True) pause_handler(True)
@@ -1164,6 +1216,10 @@ class AutopauseTimer:
class OutputResult: class OutputResult:
def __init__(self): def __init__(self):
self.filtering = TextFiltering() self.filtering = TextFiltering()
self.second_pass_thread = SecondPassThread()
def __del__(self):
self.second_pass_thread.stop()
def _post_process(self, text, strip_spaces): def _post_process(self, text, strip_spaces):
is_cj_text = self.filtering.cj_regex.search(''.join(text)) is_cj_text = self.filtering.cj_regex.search(''.join(text))
@@ -1195,6 +1251,7 @@ class OutputResult:
two_pass_processing_active = False two_pass_processing_active = False
if filter_text and engine_index_2 != -1 and engine_index_2 != engine_index: if filter_text and engine_index_2 != -1 and engine_index_2 != engine_index:
self.second_pass_thread.start()
engine_instance_2 = engine_instances[engine_index_2] engine_instance_2 = engine_instances[engine_index_2]
start_time = time.time() start_time = time.time()
res2, result_data_2 = engine_instance_2(img_or_path) res2, result_data_2 = engine_instance_2(img_or_path)
@@ -1212,9 +1269,19 @@ class OutputResult:
if output_format != 'json': if output_format != 'json':
if changed_regions_image: if changed_regions_image:
img_or_path = changed_regions_image img_or_path = changed_regions_image
else:
return
if engine_instance.threading_support:
self.second_pass_thread.submit_task(img_or_path, engine_instance)
else:
self.second_pass_thread.stop()
second_pass_result = self.second_pass_thread.get_result()
if second_pass_result:
res, result_data, processing_time = second_pass_result
two_pass_processing_active = True
elif two_pass_processing_active and engine_instance.threading_support:
return
else:
start_time = time.time() start_time = time.time()
res, result_data = engine_instance(img_or_path) res, result_data = engine_instance(img_or_path)
end_time = time.time() end_time = time.time()

View File

@@ -1,6 +1,12 @@
from multiprocessing import Process, Manager import multiprocessing
import queue
import mss import mss
from PIL import Image from PIL import Image
import sys
try:
from AppKit import NSApplication, NSApplicationActivationPolicyAccessory
except ImportError:
pass
try: try:
from PIL import ImageTk from PIL import ImageTk
@@ -11,105 +17,20 @@ except:
class ScreenSelector: class ScreenSelector:
def __init__(self, result, input_image=None): def __init__(self, result_queue, command_queue):
self.sct = mss.mss() self.sct = mss.mss()
self.monitors = self.sct.monitors[1:] self.monitors = self.sct.monitors[1:]
self.root = None self.root = None
self.result = result self.result_queue = result_queue
self.input_image = input_image self.command_queue = command_queue
self.mac_init_done = False
def on_select(self, monitor, coordinates): def on_select(self, monitor, coordinates):
self.result['monitor'] = monitor self.result_queue.put({'monitor': monitor, 'coordinates': coordinates})
self.result['coordinates'] = coordinates if self.root:
self.root.destroy() self.root.destroy()
def create_window_from_image(self, img): def _setup_selection_canvas(self, canvas, img_tk, scale_x=1, scale_y=1, monitor=None):
original_width, original_height = img.size
display_monitor = None
for monitor in self.monitors:
if (monitor['width'] >= original_width and
monitor['height'] >= original_height):
display_monitor = monitor
break
if not display_monitor:
display_monitor = self.monitors[0]
window_width = min(original_width, display_monitor['width'])
window_height = min(original_height, display_monitor['height'])
left = display_monitor['left'] + (display_monitor['width'] - window_width) // 2
top = display_monitor['top'] + (display_monitor['height'] - window_height) // 2
window = tk.Toplevel(self.root)
window.geometry(f"{window_width}x{window_height}+{left}+{top}")
window.overrideredirect(1)
window.attributes('-topmost', 1)
# Resize image if it's larger than the window
if img.width > window_width or img.height > window_height:
img = img.resize((window_width, window_height), Image.Resampling.LANCZOS)
scale_x = original_width / window_width
scale_y = original_height / window_height
else:
scale_x = 1
scale_y = 1
img_tk = ImageTk.PhotoImage(img)
canvas = tk.Canvas(window, cursor='cross', highlightthickness=0)
canvas.pack(fill=tk.BOTH, expand=True)
canvas.image = img_tk
canvas.create_image(0, 0, image=img_tk, anchor=tk.NW)
start_x, start_y, rect = None, None, None
def on_click(event):
nonlocal start_x, start_y, rect
start_x, start_y = event.x, event.y
rect = canvas.create_rectangle(start_x, start_y, start_x, start_y, outline='red')
def on_drag(event):
nonlocal rect, start_x, start_y
if rect:
canvas.coords(rect, start_x, start_y, event.x, event.y)
def on_release(event):
nonlocal start_x, start_y, scale_x, scale_y
end_x, end_y = event.x, event.y
x1 = min(start_x, end_x)
y1 = min(start_y, end_y)
x2 = max(start_x, end_x)
y2 = max(start_y, end_y)
x1 = int(x1 * scale_x)
y1 = int(y1 * scale_y)
x2 = int(x2 * scale_x)
y2 = int(y2 * scale_y)
# Return None for monitor when using input image
self.on_select(None, (x1, y1, x2 - x1, y2 - y1))
canvas.bind('<ButtonPress-1>', on_click)
canvas.bind('<B1-Motion>', on_drag)
canvas.bind('<ButtonRelease-1>', on_release)
def create_window(self, monitor):
screenshot = self.sct.grab(monitor)
img = Image.frombytes('RGB', screenshot.size, screenshot.rgb)
if img.width != monitor['width']:
img = img.resize((monitor['width'], monitor['height']), Image.Resampling.LANCZOS)
window = tk.Toplevel(self.root)
window.geometry(f"{monitor['width']}x{monitor['height']}+{monitor['left']}+{monitor['top']}")
window.overrideredirect(1)
window.attributes('-topmost', 1)
img_tk = ImageTk.PhotoImage(img)
canvas = tk.Canvas(window, cursor='cross', highlightthickness=0)
canvas.pack(fill=tk.BOTH, expand=True) canvas.pack(fill=tk.BOTH, expand=True)
canvas.image = img_tk canvas.image = img_tk
canvas.create_image(0, 0, image=img_tk, anchor=tk.NW) canvas.create_image(0, 0, image=img_tk, anchor=tk.NW)
@@ -135,43 +56,138 @@ class ScreenSelector:
x2 = max(start_x, end_x) x2 = max(start_x, end_x)
y2 = max(start_y, end_y) y2 = max(start_y, end_y)
x1 = int(x1 * scale_x)
y1 = int(y1 * scale_y)
x2 = int(x2 * scale_x)
y2 = int(y2 * scale_y)
self.on_select(monitor, (x1, y1, x2 - x1, y2 - y1)) self.on_select(monitor, (x1, y1, x2 - x1, y2 - y1))
canvas.bind('<ButtonPress-1>', on_click) canvas.bind('<ButtonPress-1>', on_click)
canvas.bind('<B1-Motion>', on_drag) canvas.bind('<B1-Motion>', on_drag)
canvas.bind('<ButtonRelease-1>', on_release) canvas.bind('<ButtonRelease-1>', on_release)
def _create_selection_window(self, img, geometry, scale_x=1, scale_y=1, monitor=None):
window = tk.Toplevel(self.root)
window.geometry(geometry)
window.overrideredirect(1)
window.attributes('-topmost', 1)
img_tk = ImageTk.PhotoImage(img)
canvas = tk.Canvas(window, cursor='cross', highlightthickness=0)
self._setup_selection_canvas(canvas, img_tk, scale_x, scale_y, monitor)
def create_window_from_image(self, img):
original_width, original_height = img.size
display_monitor = None
for monitor in self.monitors:
if (monitor['width'] >= original_width and
monitor['height'] >= original_height):
display_monitor = monitor
break
if not display_monitor:
display_monitor = self.monitors[0]
window_width = min(original_width, display_monitor['width'])
window_height = min(original_height, display_monitor['height'])
left = display_monitor['left'] + (display_monitor['width'] - window_width) // 2
top = display_monitor['top'] + (display_monitor['height'] - window_height) // 2
geometry = f"{window_width}x{window_height}+{left}+{top}"
if img.width > window_width or img.height > window_height:
img = img.resize((window_width, window_height), Image.Resampling.LANCZOS)
scale_x = original_width / window_width
scale_y = original_height / window_height
else:
scale_x = 1
scale_y = 1
self._create_selection_window(img, geometry, scale_x, scale_y, None)
def create_window(self, monitor):
screenshot = self.sct.grab(monitor)
img = Image.frombytes('RGB', screenshot.size, screenshot.rgb)
original_width, original_height = img.size
geometry = f"{monitor['width']}x{monitor['height']}+{monitor['left']}+{monitor['top']}"
if img.width != monitor['width']:
img = img.resize((monitor['width'], monitor['height']), Image.Resampling.LANCZOS)
scale_x = original_width / monitor['width']
scale_y = original_height / monitor['height']
else:
scale_x = 1
scale_y = 1
self._create_selection_window(img, geometry, scale_x, scale_y, monitor)
def start(self): def start(self):
while True:
try:
image = self.command_queue.get(timeout=0.1)
except queue.Empty:
continue
if image == False:
break
if image == True:
self.on_select(None, None)
continue
self.root = tk.Tk() self.root = tk.Tk()
if not self.mac_init_done and sys.platform == 'darwin':
app = NSApplication.sharedApplication()
app.setActivationPolicy_(NSApplicationActivationPolicyAccessory)
self.mac_init_done = True
self.root.withdraw() self.root.withdraw()
if self.input_image: if image:
self.create_window_from_image(self.input_image) self.create_window_from_image(image)
else: else:
for monitor in self.monitors: for monitor in self.monitors:
self.create_window(monitor) self.create_window(monitor)
self.root.mainloop() self.root.mainloop()
self.root.update() self.root.update()
self.root = None
def run_screen_selector(result, input_image=None): def run_screen_selector(result_queue, command_queue):
selector = ScreenSelector(result, input_image) selector = ScreenSelector(result_queue, command_queue)
selector.start() selector.start()
selector_process = None
result_queue = None
command_queue = None
def get_screen_selection(pil_image, permanent_process):
global selector_process, result_queue, command_queue
def get_screen_selection(pil_image = None):
if not selector_available: if not selector_available:
raise ValueError('tkinter or PIL with tkinter support are not installed, unable to open picker') raise ValueError('tkinter or PIL with tkinter support are not installed, unable to open picker')
with Manager() as manager: if selector_process is None or not selector_process.is_alive():
res = manager.dict() result_queue = multiprocessing.Queue()
process = Process(target=run_screen_selector, args=(res, pil_image)) command_queue = multiprocessing.Queue()
selector_process = multiprocessing.Process(target=run_screen_selector, args=(result_queue, command_queue))
selector_process.daemon = True
selector_process.start()
process.start() command_queue.put(pil_image)
process.join()
if 'monitor' in res and 'coordinates' in res: result = False
return res.copy() while (not result) and selector_process.is_alive():
else: try:
return False result = result_queue.get(timeout=0.1) # 60 second timeout
except:
continue
if not permanent_process:
command_queue.put(False)
selector_process.join()
return result