Update readme, fix minor issues

2025-05-04 09:00:19 +02:00
parent e48f388755
commit f31526a339
4 changed files with 12 additions and 6 deletions
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@ Additionally:
 - Supports reading images and/or writing text to a websocket with the `-r=websocket` and/or `-w=websocket` parameters (the port is 7331 by default, and is configurable in the config file)
 - On macOS and Linux, supports reading images from a Unix domain socket (`/tmp/owocr.sock`) with `-r=unixsocket`
 - On Windows and macOS, supports capturing from the screen directly or from a specific window with `-r=screencapture`. By default it will open a coordinate picker so you can select an area of the screen and then read from it every 3 seconds, but you can change it to screenshot the whole screen, a manual set of coordinates `x,y,width,height` or just a specific window (with the window title). You can also change the delay between screenshots or specify a keyboard combo if you don't want screenshots to be taken periodically. Refer to the config file or to `owocr --help` for more details about the screen capture settings
 - You can read images from another source at the same time with `-rs=`, the arguments are the same as `-r`
 - You can pause/unpause the image processing by pressing "p" or terminate the script with "t" or "q" inside the terminal window
 - You can switch between OCR providers pressing their corresponding keyboard key inside the terminal window (refer to the list of keys in the providers list below)
 - You can start the script paused with the `-p` option or with a specific provider with the `-e` option (refer to `owocr -h` for the list)
--- a/owocr/config.py
+++ b/owocr/config.py
@@ -13,7 +13,7 @@ parser = argparse.ArgumentParser(prog='owocr', description=textwrap.dedent('''\
 parser.add_argument('-r', '--read_from', type=str, default=argparse.SUPPRESS,
                    help='Where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.')
 parser.add_argument('-rs', '--read_from_secondary', type=str, default=argparse.SUPPRESS,
-                    help='Where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.')
+                    help="Optional secondary source to read input images from. Same options as read_from, but they can't both be directory paths.")
 parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS,
                    help='Where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.')
 parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS,
@@ -49,7 +49,7 @@ class Config:
    __engine_config = {}
    __default_config = {
        'read_from': 'clipboard',
-        'read_from_secondary': None,
+        'read_from_secondary': '',
        'write_to': 'clipboard',
        'engine': '',
        'pause_at_startup': False,
--- a/owocr/ocr.py
+++ b/owocr/ocr.py
@@ -320,8 +320,9 @@ class GoogleLens:
            new_h = int(new_w / aspect_ratio)
            img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
            img.close()
            img = img_resized
-        return (pil_image_to_bytes(img_resized), img_resized.width, img_resized.height)
+        return (pil_image_to_bytes(img), img.width, img.height)
 class GoogleLensWeb:
    name = 'glensweb'
@@ -415,8 +416,9 @@ class GoogleLensWeb:
            new_h = int(new_w / aspect_ratio)
            img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
            img.close()
            img = img_resized
-        return pil_image_to_bytes(img_resized)
+        return pil_image_to_bytes(img)
 class Bing:
    name = 'bing'
@@ -541,8 +543,9 @@ class Bing:
            new_h = int(img.height * resize_factor)
            img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
            img.close()
            img = img_resized
-        img_bytes, _ = limit_image_size(img_resized, max_byte_size)
+        img_bytes, _ = limit_image_size(img, max_byte_size)
        if img_bytes:
            res = base64.b64encode(img_bytes).decode('utf-8')
@@ -838,8 +841,9 @@ class AzureImageAnalysis:
            new_h = int(img.height * resize_factor)
            img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
            img.close()
            img = img_resized
-        return pil_image_to_bytes(img_resized)
+        return pil_image_to_bytes(img)
 class EasyOCR:
    name = 'easyocr'
--- a/owocr_config.ini
+++ b/owocr_config.ini
@@ -2,6 +2,7 @@
 ;engines = avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace
 ;engine = glens
 ;read_from = clipboard
 ;read_from_secondary =
 ;write_to = clipboard
 ;note: this specifies an amount of seconds to wait for auto pausing the program after a successful text recognition. Will be ignored when reading with screen capture. 0 to disable.
 ;auto_pause = 0