diff --git a/README.md b/README.md index 98d42ec..b0660d5 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ Additionally: - Supports reading images and/or writing text to a websocket with the `-r=websocket` and/or `-w=websocket` parameters (the port is 7331 by default, and is configurable in the config file) - On macOS and Linux, supports reading images from a Unix domain socket (`/tmp/owocr.sock`) with `-r=unixsocket` - On Windows and macOS, supports capturing from the screen directly or from a specific window with `-r=screencapture`. By default it will open a coordinate picker so you can select an area of the screen and then read from it every 3 seconds, but you can change it to screenshot the whole screen, a manual set of coordinates `x,y,width,height` or just a specific window (with the window title). You can also change the delay between screenshots or specify a keyboard combo if you don't want screenshots to be taken periodically. Refer to the config file or to `owocr --help` for more details about the screen capture settings +- You can read images from another source at the same time with `-rs=`, the arguments are the same as `-r` - You can pause/unpause the image processing by pressing "p" or terminate the script with "t" or "q" inside the terminal window - You can switch between OCR providers pressing their corresponding keyboard key inside the terminal window (refer to the list of keys in the providers list below) - You can start the script paused with the `-p` option or with a specific provider with the `-e` option (refer to `owocr -h` for the list) diff --git a/owocr/config.py b/owocr/config.py index cd7f9ad..0391c5f 100644 --- a/owocr/config.py +++ b/owocr/config.py @@ -13,7 +13,7 @@ parser = argparse.ArgumentParser(prog='owocr', description=textwrap.dedent('''\ parser.add_argument('-r', '--read_from', type=str, default=argparse.SUPPRESS, help='Where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.') parser.add_argument('-rs', '--read_from_secondary', type=str, default=argparse.SUPPRESS, - help='Where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.') + help="Optional secondary source to read input images from. Same options as read_from, but they can't both be directory paths.") parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS, help='Where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.') parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS, @@ -49,7 +49,7 @@ class Config: __engine_config = {} __default_config = { 'read_from': 'clipboard', - 'read_from_secondary': None, + 'read_from_secondary': '', 'write_to': 'clipboard', 'engine': '', 'pause_at_startup': False, diff --git a/owocr/ocr.py b/owocr/ocr.py index 0f1613c..fc9532e 100644 --- a/owocr/ocr.py +++ b/owocr/ocr.py @@ -320,8 +320,9 @@ class GoogleLens: new_h = int(new_w / aspect_ratio) img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS) img.close() + img = img_resized - return (pil_image_to_bytes(img_resized), img_resized.width, img_resized.height) + return (pil_image_to_bytes(img), img.width, img.height) class GoogleLensWeb: name = 'glensweb' @@ -415,8 +416,9 @@ class GoogleLensWeb: new_h = int(new_w / aspect_ratio) img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS) img.close() + img = img_resized - return pil_image_to_bytes(img_resized) + return pil_image_to_bytes(img) class Bing: name = 'bing' @@ -541,8 +543,9 @@ class Bing: new_h = int(img.height * resize_factor) img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS) img.close() + img = img_resized - img_bytes, _ = limit_image_size(img_resized, max_byte_size) + img_bytes, _ = limit_image_size(img, max_byte_size) if img_bytes: res = base64.b64encode(img_bytes).decode('utf-8') @@ -838,8 +841,9 @@ class AzureImageAnalysis: new_h = int(img.height * resize_factor) img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS) img.close() + img = img_resized - return pil_image_to_bytes(img_resized) + return pil_image_to_bytes(img) class EasyOCR: name = 'easyocr' diff --git a/owocr_config.ini b/owocr_config.ini index 1e11d5c..635bd58 100644 --- a/owocr_config.ini +++ b/owocr_config.ini @@ -2,6 +2,7 @@ ;engines = avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace ;engine = glens ;read_from = clipboard +;read_from_secondary = ;write_to = clipboard ;note: this specifies an amount of seconds to wait for auto pausing the program after a successful text recognition. Will be ignored when reading with screen capture. 0 to disable. ;auto_pause = 0