Update readme, fix minor issues

This commit is contained in:
AuroraWright
2025-05-04 09:00:19 +02:00
parent e48f388755
commit f31526a339
4 changed files with 12 additions and 6 deletions

View File

@@ -15,6 +15,7 @@ Additionally:
- Supports reading images and/or writing text to a websocket with the `-r=websocket` and/or `-w=websocket` parameters (the port is 7331 by default, and is configurable in the config file) - Supports reading images and/or writing text to a websocket with the `-r=websocket` and/or `-w=websocket` parameters (the port is 7331 by default, and is configurable in the config file)
- On macOS and Linux, supports reading images from a Unix domain socket (`/tmp/owocr.sock`) with `-r=unixsocket` - On macOS and Linux, supports reading images from a Unix domain socket (`/tmp/owocr.sock`) with `-r=unixsocket`
- On Windows and macOS, supports capturing from the screen directly or from a specific window with `-r=screencapture`. By default it will open a coordinate picker so you can select an area of the screen and then read from it every 3 seconds, but you can change it to screenshot the whole screen, a manual set of coordinates `x,y,width,height` or just a specific window (with the window title). You can also change the delay between screenshots or specify a keyboard combo if you don't want screenshots to be taken periodically. Refer to the config file or to `owocr --help` for more details about the screen capture settings - On Windows and macOS, supports capturing from the screen directly or from a specific window with `-r=screencapture`. By default it will open a coordinate picker so you can select an area of the screen and then read from it every 3 seconds, but you can change it to screenshot the whole screen, a manual set of coordinates `x,y,width,height` or just a specific window (with the window title). You can also change the delay between screenshots or specify a keyboard combo if you don't want screenshots to be taken periodically. Refer to the config file or to `owocr --help` for more details about the screen capture settings
- You can read images from another source at the same time with `-rs=`, the arguments are the same as `-r`
- You can pause/unpause the image processing by pressing "p" or terminate the script with "t" or "q" inside the terminal window - You can pause/unpause the image processing by pressing "p" or terminate the script with "t" or "q" inside the terminal window
- You can switch between OCR providers pressing their corresponding keyboard key inside the terminal window (refer to the list of keys in the providers list below) - You can switch between OCR providers pressing their corresponding keyboard key inside the terminal window (refer to the list of keys in the providers list below)
- You can start the script paused with the `-p` option or with a specific provider with the `-e` option (refer to `owocr -h` for the list) - You can start the script paused with the `-p` option or with a specific provider with the `-e` option (refer to `owocr -h` for the list)

View File

@@ -13,7 +13,7 @@ parser = argparse.ArgumentParser(prog='owocr', description=textwrap.dedent('''\
parser.add_argument('-r', '--read_from', type=str, default=argparse.SUPPRESS, parser.add_argument('-r', '--read_from', type=str, default=argparse.SUPPRESS,
help='Where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.') help='Where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.')
parser.add_argument('-rs', '--read_from_secondary', type=str, default=argparse.SUPPRESS, parser.add_argument('-rs', '--read_from_secondary', type=str, default=argparse.SUPPRESS,
help='Where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.') help="Optional secondary source to read input images from. Same options as read_from, but they can't both be directory paths.")
parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS, parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS,
help='Where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.') help='Where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.')
parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS, parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS,
@@ -49,7 +49,7 @@ class Config:
__engine_config = {} __engine_config = {}
__default_config = { __default_config = {
'read_from': 'clipboard', 'read_from': 'clipboard',
'read_from_secondary': None, 'read_from_secondary': '',
'write_to': 'clipboard', 'write_to': 'clipboard',
'engine': '', 'engine': '',
'pause_at_startup': False, 'pause_at_startup': False,

View File

@@ -320,8 +320,9 @@ class GoogleLens:
new_h = int(new_w / aspect_ratio) new_h = int(new_w / aspect_ratio)
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS) img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
img.close() img.close()
img = img_resized
return (pil_image_to_bytes(img_resized), img_resized.width, img_resized.height) return (pil_image_to_bytes(img), img.width, img.height)
class GoogleLensWeb: class GoogleLensWeb:
name = 'glensweb' name = 'glensweb'
@@ -415,8 +416,9 @@ class GoogleLensWeb:
new_h = int(new_w / aspect_ratio) new_h = int(new_w / aspect_ratio)
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS) img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
img.close() img.close()
img = img_resized
return pil_image_to_bytes(img_resized) return pil_image_to_bytes(img)
class Bing: class Bing:
name = 'bing' name = 'bing'
@@ -541,8 +543,9 @@ class Bing:
new_h = int(img.height * resize_factor) new_h = int(img.height * resize_factor)
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS) img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
img.close() img.close()
img = img_resized
img_bytes, _ = limit_image_size(img_resized, max_byte_size) img_bytes, _ = limit_image_size(img, max_byte_size)
if img_bytes: if img_bytes:
res = base64.b64encode(img_bytes).decode('utf-8') res = base64.b64encode(img_bytes).decode('utf-8')
@@ -838,8 +841,9 @@ class AzureImageAnalysis:
new_h = int(img.height * resize_factor) new_h = int(img.height * resize_factor)
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS) img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
img.close() img.close()
img = img_resized
return pil_image_to_bytes(img_resized) return pil_image_to_bytes(img)
class EasyOCR: class EasyOCR:
name = 'easyocr' name = 'easyocr'

View File

@@ -2,6 +2,7 @@
;engines = avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace ;engines = avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace
;engine = glens ;engine = glens
;read_from = clipboard ;read_from = clipboard
;read_from_secondary =
;write_to = clipboard ;write_to = clipboard
;note: this specifies an amount of seconds to wait for auto pausing the program after a successful text recognition. Will be ignored when reading with screen capture. 0 to disable. ;note: this specifies an amount of seconds to wait for auto pausing the program after a successful text recognition. Will be ignored when reading with screen capture. 0 to disable.
;auto_pause = 0 ;auto_pause = 0