Add verbosity option, fix regression with separation of lines/paragraphs in terminal text output with coordinate-enabled engines
This commit is contained in:
@@ -50,8 +50,10 @@ parser.add_argument('-sc', '--screen_capture_combo', type=str, default=argparse.
|
|||||||
help='When reading with screen capture, combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
|
help='When reading with screen capture, combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
|
||||||
parser.add_argument('-l', '--language', type=str, default=argparse.SUPPRESS,
|
parser.add_argument('-l', '--language', type=str, default=argparse.SUPPRESS,
|
||||||
help='Two letter language code for filtering screencapture OCR results. Ex. "ja" for Japanese, "zh" for Chinese, "ko" for Korean, "ar" for Arabic, "ru" for Russian, "el" for Greek, "he" for Hebrew, "th" for Thai. Any other value will use Latin Extended (for most European languages and English).')
|
help='Two letter language code for filtering screencapture OCR results. Ex. "ja" for Japanese, "zh" for Chinese, "ko" for Korean, "ar" for Arabic, "ru" for Russian, "el" for Greek, "he" for Hebrew, "th" for Thai. Any other value will use Latin Extended (for most European languages and English).')
|
||||||
parser.add_argument('-of', '--output_format', type=str, default=argparse.SUPPRESS, choices=['text', 'json'],
|
parser.add_argument('-of', '--output_format', type=str, default=argparse.SUPPRESS,
|
||||||
help='The output format for OCR results. Can be "text" (default) or "json" (to include coordinates).')
|
help='The output format for OCR results. Can be "text" (default) or "json" (to include coordinates).')
|
||||||
|
parser.add_argument('-v', '--verbosity', type=int, default=argparse.SUPPRESS,
|
||||||
|
help='Terminal window verbosity. Can be -2 (all recognized text is showed whole, default), -1 (only timestamps are shown), 0 (nothing is shown but errors), or larger than 0 to cut displayed text to that amount of characters.')
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
has_config = False
|
has_config = False
|
||||||
@@ -82,7 +84,8 @@ class Config:
|
|||||||
'screen_capture_combo': '',
|
'screen_capture_combo': '',
|
||||||
'screen_capture_old_macos_api': False,
|
'screen_capture_old_macos_api': False,
|
||||||
'language': 'ja',
|
'language': 'ja',
|
||||||
'output_format': 'text'
|
'output_format': 'text',
|
||||||
|
'verbosity': -2
|
||||||
}
|
}
|
||||||
|
|
||||||
def __parse(self, value):
|
def __parse(self, value):
|
||||||
|
|||||||
@@ -85,6 +85,7 @@ try:
|
|||||||
except:
|
except:
|
||||||
optimized_png_encode = False
|
optimized_png_encode = False
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class BoundingBox:
|
class BoundingBox:
|
||||||
"""
|
"""
|
||||||
@@ -133,7 +134,6 @@ class OcrResult:
|
|||||||
def empty_post_process(text):
|
def empty_post_process(text):
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def post_process(text):
|
def post_process(text):
|
||||||
text = ' '.join([''.join(i.split()) for i in text.splitlines()])
|
text = ' '.join([''.join(i.split()) for i in text.splitlines()])
|
||||||
text = text.replace('…', '...')
|
text = text.replace('…', '...')
|
||||||
@@ -141,7 +141,6 @@ def post_process(text):
|
|||||||
text = jaconv.h2z(text, ascii=True, digit=True)
|
text = jaconv.h2z(text, ascii=True, digit=True)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def input_to_pil_image(img):
|
def input_to_pil_image(img):
|
||||||
is_path = False
|
is_path = False
|
||||||
if isinstance(img, Image.Image):
|
if isinstance(img, Image.Image):
|
||||||
@@ -159,7 +158,6 @@ def input_to_pil_image(img):
|
|||||||
raise ValueError(f'img must be a path, PIL.Image or bytes object, instead got: {img}')
|
raise ValueError(f'img must be a path, PIL.Image or bytes object, instead got: {img}')
|
||||||
return pil_image, is_path
|
return pil_image, is_path
|
||||||
|
|
||||||
|
|
||||||
def pil_image_to_bytes(img, img_format='png', png_compression=6, jpeg_quality=80, optimize=False):
|
def pil_image_to_bytes(img, img_format='png', png_compression=6, jpeg_quality=80, optimize=False):
|
||||||
if img_format == 'png' and optimized_png_encode and not optimize:
|
if img_format == 'png' and optimized_png_encode and not optimize:
|
||||||
raw_data = img.convert('RGBA').tobytes()
|
raw_data = img.convert('RGBA').tobytes()
|
||||||
@@ -172,11 +170,9 @@ def pil_image_to_bytes(img, img_format='png', png_compression=6, jpeg_quality=80
|
|||||||
image_bytes = image_bytes.getvalue()
|
image_bytes = image_bytes.getvalue()
|
||||||
return image_bytes
|
return image_bytes
|
||||||
|
|
||||||
|
|
||||||
def pil_image_to_numpy_array(img):
|
def pil_image_to_numpy_array(img):
|
||||||
return np.array(img.convert('RGBA'))
|
return np.array(img.convert('RGBA'))
|
||||||
|
|
||||||
|
|
||||||
def limit_image_size(img, max_size):
|
def limit_image_size(img, max_size):
|
||||||
img_bytes = pil_image_to_bytes(img)
|
img_bytes = pil_image_to_bytes(img)
|
||||||
if len(img_bytes) <= max_size:
|
if len(img_bytes) <= max_size:
|
||||||
@@ -745,7 +741,6 @@ class AppleVision:
|
|||||||
def _preprocess(self, img):
|
def _preprocess(self, img):
|
||||||
return pil_image_to_bytes(img, 'tiff')
|
return pil_image_to_bytes(img, 'tiff')
|
||||||
|
|
||||||
|
|
||||||
class AppleLiveText:
|
class AppleLiveText:
|
||||||
name = 'alivetext'
|
name = 'alivetext'
|
||||||
readable_name = 'Apple Live Text'
|
readable_name = 'Apple Live Text'
|
||||||
@@ -883,7 +878,6 @@ class AppleLiveText:
|
|||||||
ns_image = NSImage.alloc().initWithData_(ns_data)
|
ns_image = NSImage.alloc().initWithData_(ns_data)
|
||||||
return ns_image
|
return ns_image
|
||||||
|
|
||||||
|
|
||||||
class WinRTOCR:
|
class WinRTOCR:
|
||||||
name = 'winrtocr'
|
name = 'winrtocr'
|
||||||
readable_name = 'WinRT OCR'
|
readable_name = 'WinRT OCR'
|
||||||
|
|||||||
12
owocr/run.py
12
owocr/run.py
@@ -825,6 +825,7 @@ def process_and_write_results(img_or_path, last_result, filtering, notify):
|
|||||||
return orig_text
|
return orig_text
|
||||||
|
|
||||||
output_format = config.get_general('output_format')
|
output_format = config.get_general('output_format')
|
||||||
|
verbosity = config.get_general('verbosity')
|
||||||
output_string = ''
|
output_string = ''
|
||||||
log_message = ''
|
log_message = ''
|
||||||
|
|
||||||
@@ -838,6 +839,7 @@ def process_and_write_results(img_or_path, last_result, filtering, notify):
|
|||||||
full_text_parts.append(w.text)
|
full_text_parts.append(w.text)
|
||||||
if w.separator:
|
if w.separator:
|
||||||
full_text_parts.append(w.separator)
|
full_text_parts.append(w.separator)
|
||||||
|
full_text_parts.append('\n')
|
||||||
unprocessed_text = "".join(full_text_parts)
|
unprocessed_text = "".join(full_text_parts)
|
||||||
|
|
||||||
if output_format == 'json':
|
if output_format == 'json':
|
||||||
@@ -862,7 +864,15 @@ def process_and_write_results(img_or_path, last_result, filtering, notify):
|
|||||||
output_string = post_process(unprocessed_text)
|
output_string = post_process(unprocessed_text)
|
||||||
log_message = output_string
|
log_message = output_string
|
||||||
|
|
||||||
logger.opt(ansi=True).info(f'Text recognized in {end_time - start_time:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {log_message}')
|
if verbosity != 0:
|
||||||
|
if verbosity < -1:
|
||||||
|
log_message_terminal = ': ' + log_message
|
||||||
|
elif verbosity == -1:
|
||||||
|
log_message_terminal = ''
|
||||||
|
else:
|
||||||
|
log_message_terminal = ': ' + (log_message if len(log_message) <= verbosity else log_message[:verbosity] + '[...]')
|
||||||
|
|
||||||
|
logger.opt(ansi=True).info(f'Text recognized in {end_time - start_time:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>{log_message_terminal}')
|
||||||
|
|
||||||
if notify and config.get_general('notifications'):
|
if notify and config.get_general('notifications'):
|
||||||
notifier.send(title='owocr', message='Text recognized: ' + log_message, urgency=get_notification_urgency())
|
notifier.send(title='owocr', message='Text recognized: ' + log_message, urgency=get_notification_urgency())
|
||||||
|
|||||||
@@ -14,6 +14,8 @@
|
|||||||
;notifications = False
|
;notifications = False
|
||||||
;ignore_flag = False
|
;ignore_flag = False
|
||||||
;delete_images = False
|
;delete_images = False
|
||||||
|
;note: terminal window verbosity. Can be -2 (all recognized text is showed whole), -1 (only timestamps are shown), 0 (nothing is shown but errors), or larger than 0 to cut displayed text to that amount of characters.
|
||||||
|
;verbosity = -2
|
||||||
;note: this specifies a combo to wait on for pausing the program. As an example: <ctrl>+<shift>+p. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
|
;note: this specifies a combo to wait on for pausing the program. As an example: <ctrl>+<shift>+p. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
|
||||||
;combo_pause = <ctrl>+<shift>+p
|
;combo_pause = <ctrl>+<shift>+p
|
||||||
;note: this specifies a combo to wait on for switching the OCR engine. As an example: <ctrl>+<shift>+a. To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
|
;note: this specifies a combo to wait on for switching the OCR engine. As an example: <ctrl>+<shift>+a. To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
|
||||||
@@ -29,10 +31,11 @@
|
|||||||
;note: this specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: <ctrl>+<shift>+s. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
|
;note: this specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: <ctrl>+<shift>+s. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
|
||||||
;screen_capture_combo = <ctrl>+<shift>+s
|
;screen_capture_combo = <ctrl>+<shift>+s
|
||||||
;screen_capture_old_macos_api = False
|
;screen_capture_old_macos_api = False
|
||||||
;language = ja
|
;note: this specifies the language to use for text filtering while using "screencapture". Valid values: ja: (Japanese) zh: (Chinese) ko: (Korean) ar: (Arabic) ru: (Russian) el: (Greek) he: (Hebrew) th: (Thai)
|
||||||
;language = zh
|
|
||||||
;note: This specifies the language to use for text filtering while using "screencapture". Valid values: ja: (Japanese) zh: (Chinese) ko: (Korean) ar: (Arabic) ru: (Russian) el: (Greek) he: (Hebrew) th: (Thai)
|
|
||||||
;Any other value will use Latin Extended (for most European languages and English).
|
;Any other value will use Latin Extended (for most European languages and English).
|
||||||
|
;language = ja
|
||||||
|
;note: can be "text" (default) or "json" (to include coordinates).
|
||||||
|
;output_format = text
|
||||||
;[winrtocr]
|
;[winrtocr]
|
||||||
;url = http://aaa.xxx.yyy.zzz:8000
|
;url = http://aaa.xxx.yyy.zzz:8000
|
||||||
;[oneocr]
|
;[oneocr]
|
||||||
|
|||||||
Reference in New Issue
Block a user