Add window area selection, combo to re-select screen/window area at runtime

2025-10-12 09:07:02 +02:00
parent 6ada579b19
commit e262231a1d
3 changed files with 282 additions and 132 deletions
--- a/owocr/config.py
+++ b/owocr/config.py
@@ -44,6 +44,8 @@ parser.add_argument('-cs', '--combo_engine_switch', type=str, default=argparse.S
                    help='Combo to wait on for switching the OCR engine. As an example: "<ctrl>+<shift>+a". To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
 parser.add_argument('-sa', '--screen_capture_area', type=str, default=argparse.SUPPRESS,
                    help='Area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).')
 parser.add_argument('-swa', '--screen_capture_window_area', type=str, default=argparse.SUPPRESS,
                    help='If capturing with screen capture, subsection of the selected window. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "window" to use the whole window.')
 parser.add_argument('-sd', '--screen_capture_delay_secs', type=float, default=argparse.SUPPRESS,
                    help='Delay (in seconds) between screenshots when reading with screen capture. -1 to disable periodic screenshots.')
 parser.add_argument('-sw', '--screen_capture_only_active_windows', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
@@ -56,6 +58,8 @@ parser.add_argument('-sff', '--screen_capture_furigana_filter', type=str2bool, n
                    help="When reading with screen capture, try to filter furigana lines.")
 parser.add_argument('-sc', '--screen_capture_combo', type=str, default=argparse.SUPPRESS,
                    help='When reading with screen capture, combo to wait on for taking a screenshot. If periodic screenshots are also enabled, any screenshot taken this way bypasses the filtering. Example value: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
 parser.add_argument('-scc', '--coordinate_selector_combo', type=str, default=argparse.SUPPRESS,
                    help='When reading with screen capture, combo to wait on for invoking the coordinate picker to change the screen/window area. Example value: "<ctrl>+<shift>+c". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
 parser.add_argument('-l', '--language', type=str, default=argparse.SUPPRESS,
                    help='Two letter language code for filtering screencapture OCR results. Ex. "ja" for Japanese, "zh" for Chinese, "ko" for Korean, "ar" for Arabic, "ru" for Russian, "el" for Greek, "he" for Hebrew, "th" for Thai. Any other value will use Latin Extended (for most European languages and English).')
 parser.add_argument('-of', '--output_format', type=str, default=argparse.SUPPRESS,
@@ -89,12 +93,14 @@ class Config:
        'combo_pause': '',
        'combo_engine_switch': '',
        'screen_capture_area': '',
        'screen_capture_window_area': 'window',
        'screen_capture_delay_secs': 0,
        'screen_capture_only_active_windows': True,
        'screen_capture_frame_stabilization': -1,
        'screen_capture_line_recovery': True,
        'screen_capture_furigana_filter': True,
        'screen_capture_combo': '',
        'coordinate_selector_combo': '',
        'screen_capture_old_macos_api': False,
        'language': 'ja',
        'output_format': 'text',
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -48,7 +48,7 @@ try:
    from AppKit import NSData, NSImage, NSBitmapImageRep, NSDeviceRGBColorSpace, NSGraphicsContext, NSZeroPoint, NSZeroRect, NSCompositingOperationCopy
    from Quartz import CGWindowListCreateImageFromArray, kCGWindowImageBoundsIgnoreFraming, CGRectMake, CGRectNull, CGMainDisplayID, CGWindowListCopyWindowInfo, \
                       CGWindowListCreateDescriptionFromArray, kCGWindowListOptionOnScreenOnly, kCGWindowListExcludeDesktopElements, kCGWindowName, kCGNullWindowID, \
-                       CGImageGetWidth, CGImageGetHeight, CGDataProviderCopyData, CGImageGetDataProvider, CGImageGetBytesPerRow
+                       CGImageGetWidth, CGImageGetHeight, CGDataProviderCopyData, CGImageGetDataProvider, CGImageGetBytesPerRow, kCGWindowImageNominalResolution
    from ScreenCaptureKit import SCContentFilter, SCScreenshotManager, SCShareableContent, SCStreamConfiguration, SCCaptureResolutionBest
 except ImportError:
    pass
@@ -312,7 +312,7 @@ class TextFiltering:
        self.stable_frame_data = None
        self.last_frame_text = []
        self.last_last_frame_text = []
-        self.stable_frame_text = None
+        self.stable_frame_text = []
        self.processed_stable_frame = False
        self.frame_stabilization_timestamp = 0
        self.cj_regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
@@ -388,12 +388,6 @@ class TextFiltering:
        return filtered_text
    def _find_changed_lines(self, pil_image, current_result):
        if (self.last_frame_data != [None, None] and (current_result.image_properties.width != self.last_frame_data[1].image_properties.width or
            current_result.image_properties.height != self.last_frame_data[1].image_properties.height)):
            self.stable_frame_data = None
            self.last_frame_data = [None, None]
            self.last_last_frame_data = [None, None]
        if self.frame_stabilization == 0:
            changed_lines = self._find_changed_lines_impl(current_result, self.last_frame_data[1])
            if changed_lines == None:
@@ -598,6 +592,11 @@ class TextFiltering:
                    self.recovered_lines_count -= 1
                    continue
            changed_line = current_result[i]
            if next_result != None:
                logger.opt(ansi=True).debug(f"<red>Recovered line: '{changed_line}'</red>")
            if current_lines_ocr:
                current_line_bbox = current_lines_ocr[i].bounding_box
                # Check if line contains only kana (no kanji)
@@ -642,11 +641,6 @@ class TextFiltering:
                    if is_furigana:
                        continue
            changed_line = current_result[i]
            if next_result != None:
                logger.opt(ansi=True).debug(f"<red>Recovered line: '{changed_line}'</red>")
            if first and len(current_text) > 3:
                first = False
                # For the first line, check if it contains the end of previous text
@@ -695,10 +689,6 @@ class TextFiltering:
        return current_line
    def _check_horizontal_overlap(self, bbox1, bbox2):
        """
        Calculate the horizontal overlap ratio between two bounding boxes.
        Returns a value between 0.0 (no overlap) and 1.0 (complete overlap).
        """
        # Calculate left and right boundaries for both boxes
        left1 = bbox1.center_x - bbox1.width / 2
        right1 = bbox1.center_x + bbox1.width / 2
@@ -790,6 +780,7 @@ class ScreenshotThread(threading.Thread):
    def __init__(self):
        super().__init__(daemon=True)
        screen_capture_area = config.get_general('screen_capture_area')
        self.is_combo_screenshot = False
        self.macos_window_tracker_instance = None
        self.windows_window_tracker_instance = None
        self.screencapture_window_active = True
@@ -821,27 +812,16 @@ class ScreenshotThread(threading.Thread):
            elif self.screencapture_mode == 3:
                coord_left, coord_top, coord_width, coord_height = [int(c.strip()) for c in screen_capture_area.split(',')]
            else:
-                logger.opt(ansi=True).info('Launching screen coordinate picker')
+                self.launch_coordinate_picker(True)
                screen_selection = get_screen_selection()
                if not screen_selection:
                    raise ValueError('Picker window was closed or an error occurred')
                screen_capture_monitor = screen_selection['monitor']
                x, y, coord_width, coord_height = screen_selection['coordinates']
                if coord_width > 0 and coord_height > 0:
                    coord_top = screen_capture_monitor['top'] + y
                    coord_left = screen_capture_monitor['left'] + x
                else:
                    logger.opt(ansi=True).info('Selection is empty, selecting whole screen')
                    coord_left = screen_capture_monitor['left']
                    coord_top = screen_capture_monitor['top']
                    coord_width = screen_capture_monitor['width']
                    coord_height = screen_capture_monitor['height']
-            self.sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height}
+            if self.screencapture_mode != 0:
-            logger.opt(ansi=True).info(f'Selected coordinates: {coord_left},{coord_top},{coord_width},{coord_height}')
+                self.sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height}
                logger.opt(ansi=True).info(f'Selected coordinates: {coord_left},{coord_top},{coord_width},{coord_height}')
        else:
            self.screen_capture_only_active_windows = config.get_general('screen_capture_only_active_windows')
            self.window_area_coordinates = None
            area_invalid_error = '"screen_capture_area" must be empty, "screen_N" where N is a screen number starting from 1, a valid set of coordinates, or a valid window name'
            if sys.platform == 'darwin':
                if config.get_general('screen_capture_old_macos_api') or int(platform.mac_ver()[0].split('.')[0]) < 14:
                    self.old_macos_screenshot_api = True
@@ -890,7 +870,17 @@ class ScreenshotThread(threading.Thread):
                logger.opt(ansi=True).info(f'Selected window: {window_title}')
            else:
                raise ValueError('Window capture is only currently supported on Windows and macOS')
-        self.is_combo_screenshot = False
+
            screen_capture_window_area = config.get_general('screen_capture_window_area')
            if screen_capture_window_area != 'window':    
                if len(screen_capture_window_area.split(',')) == 4:
                    x, y, x2, y2 = [int(c.strip()) for c in screen_capture_window_area.split(',')]
                    logger.opt(ansi=True).info(f'Selected window coordinates: {x},{y},{x2},{y2}')
                    self.window_area_coordinates = (img.size, (x, y, x2, y2))
                elif screen_capture_window_area == '':
                    self.launch_coordinate_picker(True)
                else:
                    raise ValueError('"screen_capture_window_area" must be empty, "window" for the whole window, or a valid set of coordinates')
    def get_windows_window_handle(self, window_title):
        def callback(hwnd, window_title_part):
@@ -998,6 +988,74 @@ class ScreenshotThread(threading.Thread):
        if not found:
            on_window_closed(False)
    def take_screenshot(self):
        if self.screencapture_mode == 2:
            if sys.platform == 'darwin':
                with objc.autorelease_pool():
                    if self.old_macos_screenshot_api:
                        cg_image = CGWindowListCreateImageFromArray(CGRectNull, [self.window_id], kCGWindowImageBoundsIgnoreFraming | kCGWindowImageNominalResolution)
                    else:
                        self.capture_macos_window_screenshot(self.window_id)
                        try:
                            cg_image = self.screencapturekit_queue.get(timeout=0.5)
                        except queue.Empty:
                            cg_image = None
                    if not cg_image:
                        return None
                    width = CGImageGetWidth(cg_image)
                    height = CGImageGetHeight(cg_image)
                    raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image))
                    bpr = CGImageGetBytesPerRow(cg_image)
                img = Image.frombuffer('RGBA', (width, height), raw_data, 'raw', 'BGRA', bpr, 1)
            else:
                try:
                    coord_left, coord_top, right, bottom = win32gui.GetWindowRect(self.window_handle)
                    coord_width = right - coord_left
                    coord_height = bottom - coord_top
                    hwnd_dc = win32gui.GetWindowDC(self.window_handle)
                    mfc_dc = win32ui.CreateDCFromHandle(hwnd_dc)
                    save_dc = mfc_dc.CreateCompatibleDC()
                    save_bitmap = win32ui.CreateBitmap()
                    save_bitmap.CreateCompatibleBitmap(mfc_dc, coord_width, coord_height)
                    save_dc.SelectObject(save_bitmap)
                    result = ctypes.windll.user32.PrintWindow(self.window_handle, save_dc.GetSafeHdc(), 2)
                    bmpinfo = save_bitmap.GetInfo()
                    bmpstr = save_bitmap.GetBitmapBits(True)
                except pywintypes.error:
                    return None
                img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
                try:
                    win32gui.DeleteObject(save_bitmap.GetHandle())
                except:
                    pass
                try:
                    save_dc.DeleteDC()
                except:
                    pass
                try:
                    mfc_dc.DeleteDC()
                except:
                    pass
                try:
                    win32gui.ReleaseDC(self.window_handle, hwnd_dc)
                except:
                    pass
            if self.window_area_coordinates:
                if img.size != self.window_area_coordinates[0]:
                    self.window_area_coordinates = None
                    logger.opt(ansi=True).warning('Window size changed, discarding area selection')
                else:
                    img = img.crop(self.window_area_coordinates[1])
        else:
            sct_img = sct.grab(self.sct_params)
            img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
        return img
    def write_result(self, result):
        if self.is_combo_screenshot:
            self.is_combo_screenshot = False
@@ -1005,72 +1063,60 @@ class ScreenshotThread(threading.Thread):
        else:
            periodic_screenshot_queue.put(result)
    def launch_coordinate_picker(self, on_init):
        if self.screencapture_mode != 2:
            logger.opt(ansi=True).info('Launching screen coordinate picker')
            screen_selection = get_screen_selection()
            if not screen_selection:
                if on_init:
                    raise ValueError('Picker window was closed or an error occurred')
                else:
                    logger.opt(ansi=True).warning('Picker window was closed or an error occurred, leaving settings unchanged')
                    return
            screen_capture_monitor = screen_selection['monitor']
            x, y, coord_width, coord_height = screen_selection['coordinates']
            if coord_width > 0 and coord_height > 0:
                coord_top = screen_capture_monitor['top'] + y
                coord_left = screen_capture_monitor['left'] + x
            else:
                logger.opt(ansi=True).info('Selection is empty, selecting whole screen')
                coord_left = screen_capture_monitor['left']
                coord_top = screen_capture_monitor['top']
                coord_width = screen_capture_monitor['width']
                coord_height = screen_capture_monitor['height']
            self.sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height}
            logger.opt(ansi=True).info(f'Selected coordinates: {coord_left},{coord_top},{coord_width},{coord_height}')
        else:
            self.window_area_coordinates = None
            img = self.take_screenshot()
            logger.opt(ansi=True).info('Launching window coordinate picker')
            window_selection = get_screen_selection(img)
            if not window_selection:
                logger.opt(ansi=True).warning('Picker window was closed or an error occurred, selecting whole window')
            else:
                x, y, coord_width, coord_height = window_selection['coordinates']
                if coord_width > 0 and coord_height > 0:
                    x2 = x + coord_width
                    y2 = y + coord_height
                    logger.opt(ansi=True).info(f'Selected window coordinates: {x},{y},{x2},{y2}')
                    self.window_area_coordinates = (img.size, (x, y, x2, y2))
                else:
                    logger.opt(ansi=True).info('Selection is empty, selecting whole window')
    def run(self):
        if self.screencapture_mode != 2:
            sct = mss.mss()
        while not terminated:
            if not screenshot_event.wait(timeout=0.1):
                if coordinate_selector_event.is_set():
                    self.launch_coordinate_picker(False)
                    coordinate_selector_event.clear()
                continue
            if self.screencapture_mode == 2:
                if sys.platform == 'darwin':
                    with objc.autorelease_pool():
                        if self.old_macos_screenshot_api:
                            cg_image = CGWindowListCreateImageFromArray(CGRectNull, [self.window_id], kCGWindowImageBoundsIgnoreFraming)
                        else:
                            self.capture_macos_window_screenshot(self.window_id)
                            try:
                                cg_image = self.screencapturekit_queue.get(timeout=0.5)
                            except queue.Empty:
                                cg_image = None
                        if not cg_image:
                            self.write_result(0)
                            break
                        width = CGImageGetWidth(cg_image)
                        height = CGImageGetHeight(cg_image)
                        raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image))
                        bpr = CGImageGetBytesPerRow(cg_image)
                    img = Image.frombuffer('RGBA', (width, height), raw_data, 'raw', 'BGRA', bpr, 1)
                else:
                    try:
                        coord_left, coord_top, right, bottom = win32gui.GetWindowRect(self.window_handle)
                        coord_width = right - coord_left
                        coord_height = bottom - coord_top
-                        hwnd_dc = win32gui.GetWindowDC(self.window_handle)
+            img = self.take_screenshot()
-                        mfc_dc = win32ui.CreateDCFromHandle(hwnd_dc)
+            if not img:
-                        save_dc = mfc_dc.CreateCompatibleDC()
+                self.write_result(0)
-
+                break
                        save_bitmap = win32ui.CreateBitmap()
                        save_bitmap.CreateCompatibleBitmap(mfc_dc, coord_width, coord_height)
                        save_dc.SelectObject(save_bitmap)
                        result = ctypes.windll.user32.PrintWindow(self.window_handle, save_dc.GetSafeHdc(), 2)
                        bmpinfo = save_bitmap.GetInfo()
                        bmpstr = save_bitmap.GetBitmapBits(True)
                    except pywintypes.error:
                        self.write_result(0)
                        break
                    img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
                    try:
                        win32gui.DeleteObject(save_bitmap.GetHandle())
                    except:
                        pass
                    try:
                        save_dc.DeleteDC()
                    except:
                        pass
                    try:
                        mfc_dc.DeleteDC()
                    except:
                        pass
                    try:
                        win32gui.ReleaseDC(self.window_handle, hwnd_dc)
                    except:
                        pass
            else:
                sct_img = sct.grab(self.sct_params)
                img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
            self.write_result(img)
            screenshot_event.clear()
@@ -1275,31 +1321,43 @@ def user_input_thread_run():
    if sys.platform == 'win32':
        import msvcrt
        while not terminated:
-            user_input_bytes = msvcrt.getch()
+            if coordinate_selector_event.is_set():
-            try:
+                while coordinate_selector_event.is_set():
-                user_input = user_input_bytes.decode()
+                    time.sleep(0.1)
-                if user_input.lower() in 'tq':
+            if msvcrt.kbhit():
-                    _terminate_handler()
+                try:
-                elif user_input.lower() == 'p':
+                    user_input_bytes = msvcrt.getch()
-                    pause_handler(False)
+                    user_input = user_input_bytes.decode()
-                else:
+                    if user_input.lower() in 'tq':
-                    engine_change_handler(user_input, False)
+                        _terminate_handler()
-            except UnicodeDecodeError:
+                    elif user_input.lower() == 'p':
-                pass
+                        pause_handler(False)
                    else:
                        engine_change_handler(user_input, False)
                except UnicodeDecodeError:
                    pass
            else:
                time.sleep(0.1)
    else:
-        import tty, termios
+        import tty, termios, select
        fd = sys.stdin.fileno()
        old_settings = termios.tcgetattr(fd)
        try:
-            tty.setcbreak(sys.stdin.fileno())
+            tty.setcbreak(fd)
            while not terminated:
-                user_input = sys.stdin.read(1)
+                if coordinate_selector_event.is_set():
-                if user_input.lower() in 'tq':
+                    while coordinate_selector_event.is_set():
-                    _terminate_handler()
+                        time.sleep(0.1)
-                elif user_input.lower() == 'p':
+                    tty.setcbreak(fd)
-                    pause_handler(False)
+                rlist, _, _ = select.select([sys.stdin], [], [], 0.1)
-                else:
+                if rlist:
-                    engine_change_handler(user_input, False)
+                    user_input = sys.stdin.read(1)
                    if user_input.lower() in 'tq':
                        _terminate_handler()
                    elif user_input.lower() == 'p':
                        pause_handler(False)
                    else:
                        engine_change_handler(user_input, False)
        finally:
            termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
@@ -1322,6 +1380,10 @@ def on_screenshot_combo():
    screenshot_event.set()
 def on_coordinate_selector_combo():
    coordinate_selector_event.set()
 def run():
    logger_level = 'DEBUG' if config.get_general('uwu') else 'INFO'
    logger.configure(handlers=[{'sink': sys.stderr, 'format': config.get_general('logger_format'), 'level': logger_level}])
@@ -1379,6 +1441,7 @@ def run():
    global websocket_server_thread
    global screenshot_thread
    global image_queue
    global coordinate_selector_event
    non_path_inputs = ('screencapture', 'clipboard', 'websocket', 'unixsocket')
    read_from = config.get_general('read_from')
    read_from_secondary = config.get_general('read_from_secondary')
@@ -1403,6 +1466,7 @@ def run():
    combo_engine_switch = config.get_general('combo_engine_switch')
    screen_capture_periodic = False
    screen_capture_on_combo = False
    coordinate_selector_event = threading.Event()
    notifier = DesktopNotifierSync()
    image_queue = queue.Queue()
    key_combos = {}
@@ -1422,10 +1486,13 @@ def run():
        global screenshot_event
        screen_capture_delay_secs = config.get_general('screen_capture_delay_secs')
        screen_capture_combo = config.get_general('screen_capture_combo')
        coordinate_selector_combo = config.get_general('coordinate_selector_combo')
        last_screenshot_time = 0
        if screen_capture_combo != '':
            screen_capture_on_combo = True
            key_combos[screen_capture_combo] = on_screenshot_combo
        if coordinate_selector_combo != '':
            key_combos[coordinate_selector_combo] = on_coordinate_selector_combo
        if screen_capture_delay_secs != -1:
            global periodic_screenshot_queue
            periodic_screenshot_queue = queue.Queue()
@@ -1547,3 +1614,4 @@ def run():
        screenshot_thread.join()
    if key_combo_listener:
        key_combo_listener.stop()
    user_input_thread.join()
--- a/owocr/screen_coordinate_picker.py
+++ b/owocr/screen_coordinate_picker.py
@@ -11,17 +11,90 @@ except:
 class ScreenSelector:
-    def __init__(self, result):
+    def __init__(self, result, input_image=None):
        self.sct = mss.mss()
        self.monitors = self.sct.monitors[1:]
        self.root = None
        self.result = result
        self.input_image = input_image
    def on_select(self, monitor, coordinates):
        self.result['monitor'] = monitor
        self.result['coordinates'] = coordinates
        self.root.destroy()
    def create_window_from_image(self, img):
        original_width, original_height = img.size
        display_monitor = None
        for monitor in self.monitors:
            if (monitor['width'] >= original_width and 
                monitor['height'] >= original_height):
                display_monitor = monitor
                break
        if not display_monitor:
            display_monitor = self.monitors[0]
        window_width = min(original_width, display_monitor['width'])
        window_height = min(original_height, display_monitor['height'])
        left = display_monitor['left'] + (display_monitor['width'] - window_width) // 2
        top = display_monitor['top'] + (display_monitor['height'] - window_height) // 2
        window = tk.Toplevel(self.root)
        window.geometry(f"{window_width}x{window_height}+{left}+{top}")
        window.overrideredirect(1)
        window.attributes('-topmost', 1)
        # Resize image if it's larger than the window
        if img.width > window_width or img.height > window_height:
            img = img.resize((window_width, window_height), Image.Resampling.LANCZOS)
            scale_x = original_width / window_width
            scale_y = original_height / window_height
        else:
            scale_x = 1
            scale_y = 1
        img_tk = ImageTk.PhotoImage(img)
        canvas = tk.Canvas(window, cursor='cross', highlightthickness=0)
        canvas.pack(fill=tk.BOTH, expand=True)
        canvas.image = img_tk
        canvas.create_image(0, 0, image=img_tk, anchor=tk.NW)
        start_x, start_y, rect = None, None, None
        def on_click(event):
            nonlocal start_x, start_y, rect
            start_x, start_y = event.x, event.y
            rect = canvas.create_rectangle(start_x, start_y, start_x, start_y, outline='red')
        def on_drag(event):
            nonlocal rect, start_x, start_y
            if rect:
                canvas.coords(rect, start_x, start_y, event.x, event.y)
        def on_release(event):
            nonlocal start_x, start_y, scale_x, scale_y
            end_x, end_y = event.x, event.y
            x1 = min(start_x, end_x) 
            y1 = min(start_y, end_y) 
            x2 = max(start_x, end_x) 
            y2 = max(start_y, end_y)
            x1 = int(x1 * scale_x)
            y1 = int(y1 * scale_y)
            x2 = int(x2 * scale_x)
            y2 = int(y2 * scale_y)
            # Return None for monitor when using input image
            self.on_select(None, (x1, y1, x2 - x1, y2 - y1))
        canvas.bind('<ButtonPress-1>', on_click)
        canvas.bind('<B1-Motion>', on_drag)
        canvas.bind('<ButtonRelease-1>', on_release)
    def create_window(self, monitor):
        screenshot = self.sct.grab(monitor)
        img = Image.frombytes('RGB', screenshot.size, screenshot.rgb)
@@ -72,25 +145,28 @@ class ScreenSelector:
        self.root = tk.Tk()
        self.root.withdraw()
-        for monitor in self.monitors:
+        if self.input_image:
-            self.create_window(monitor)
+            self.create_window_from_image(self.input_image)
        else:
            for monitor in self.monitors:
                self.create_window(monitor)
        self.root.mainloop()
        self.root.update()
-def run_screen_selector(result):
+def run_screen_selector(result, input_image=None):
-    selector = ScreenSelector(result)
+    selector = ScreenSelector(result, input_image)
    selector.start()
-def get_screen_selection():
+def get_screen_selection(pil_image = None):
    if not selector_available:
        raise ValueError('tkinter or PIL with tkinter support are not installed, unable to open picker')
    with Manager() as manager:
        res = manager.dict()
-        process = Process(target=run_screen_selector, args=(res,))
+        process = Process(target=run_screen_selector, args=(res, pil_image))
        process.start()    
        process.join()